diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 4268a26422791cdfd05569cd3e73038e775669cf..d5b9de64db607ff1fc7c005f1727bde912550ae9 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -117,6 +117,7 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=m CONFIG_ACPI_APEI_ERST_DEBUG=m CONFIG_ACPI_PFRUT=m +CONFIG_ACPI_POWER_NOTIFIER_CHAIN=y CONFIG_ACPI_AGDI=y CONFIG_HAVE_KVM_PINNED_VMID=y CONFIG_VIRTUALIZATION=y @@ -1804,6 +1805,10 @@ CONFIG_ARM_SMMU_V3_HTTU=y CONFIG_ARM_SMMU_V3_ECMDQ=y CONFIG_ARM_SMMU_V3_IOMMUFD=y +CONFIG_UDFI=y +CONFIG_UDFI_CIS=m +CONFIG_UDFI_ODF=m + # # unified bus # @@ -1840,6 +1845,13 @@ CONFIG_UB_UBMEM_UMMU=y CONFIG_UB_UMMU_BYPASSDEV=y # end of UMMU +CONFIG_OBMM=m + +# UB sentry +CONFIG_UB_SENTRY=m +CONFIG_UB_SENTRY_REMOTE=m +# end of UB sentry + # URMA CONFIG_UB_URMA=m diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 648228831f5e875c02b25bea49fcc7d7ce4dbc5b..fdae6368097568a0a16443861d38182fdbcc9e45 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -181,6 +181,13 @@ config ACPI_BUTTON To compile this driver as a module, choose M here: the module will be called button. +config ACPI_POWER_NOTIFIER_CHAIN + bool "enable acpi power notifier chain" + depends on ACPI_BUTTON && ARM64 + default n + help + Say Y here to enable acpi power notifier chain. + config ACPI_TINY_POWER_BUTTON tristate "Tiny Power Button Driver" depends on !ACPI_BUTTON diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index c760e38df981f3abdba48ef31a6b285f5760341c..2394ad368d6b6a6fd4ac307ebe9ea947d427a3b8 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -436,6 +436,22 @@ static void acpi_lid_notify(acpi_handle handle, u32 event, void *data) acpi_lid_update_state(device, true); } +#if IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) +static BLOCKING_NOTIFIER_HEAD(acpi_power_chain_head); + +int register_acpi_power_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&acpi_power_chain_head, nb); +} +EXPORT_SYMBOL(register_acpi_power_notifier); + +int unregister_acpi_power_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&acpi_power_chain_head, nb); +} +EXPORT_SYMBOL(unregister_acpi_power_notifier); +#endif + static void acpi_button_notify(acpi_handle handle, u32 event, void *data) { struct acpi_device *device = data; @@ -449,6 +465,13 @@ static void acpi_button_notify(acpi_handle handle, u32 event, void *data) return; } +#if IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) + if (blocking_notifier_call_chain(&acpi_power_chain_head, 0, 0) == NOTIFY_BAD) { + pr_info("acpi power notifier chain: receive bad result, stop poweroff\n"); + return; + } +#endif + acpi_pm_wakeup_event(&device->dev); button = acpi_driver_data(device); diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 11e8d19658aaaa3c8727de6d73dcc365b8e089be..f970065c529075d89454e0dd3a5832040d250f27 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -314,5 +314,6 @@ source "drivers/firmware/psci/Kconfig" source "drivers/firmware/smccc/Kconfig" source "drivers/firmware/tegra/Kconfig" source "drivers/firmware/xilinx/Kconfig" +source "drivers/firmware/uvb/Kconfig" endmenu diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 28fcddcd688fc2fd0f977df39126bf251808b970..defec11828e87e9841252b51219c0830f5d537d3 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -38,3 +38,4 @@ obj-y += psci/ obj-y += smccc/ obj-y += tegra/ obj-y += xilinx/ +obj-$(CONFIG_UDFI) += uvb/ diff --git a/drivers/firmware/uvb/Kconfig b/drivers/firmware/uvb/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..97a69aaa686f742e9e0cff2eb73080c62a214371 --- /dev/null +++ b/drivers/firmware/uvb/Kconfig @@ -0,0 +1,31 @@ +config UDFI + bool "UDFI Drivers" + depends on ARM64 + default n + help + UBIOS Distributed Firmware Interface (UDFI) support for kernel + requires a UBIOS platform. UDFI provides communication channels + among OS, BIOS and other firmware: + (1) Call ID Service (CIS), by which OS sends a call to BIOS, can be used + to operate specific hardware, read/write BIOS information or call BIOS + functions , e.g., read/write RTC, modify boot options, etc. + (2) Notify ID Information (NII), by which OS receives notification from + BIOS, provides notification mechanism from BIOS to OS. This is useful + when events detected and BIOS needs to inform OS, e.g., RAS events. + +if UDFI + +config UDFI_CIS + tristate "CIS framework" + select UDFI_ODF + default n + help + This option is selected if CIS framework is needed. + +config UDFI_ODF + tristate "odf parse" + default n + help + This driver support UBIOS object description specification parse + +endif # UDFI diff --git a/drivers/firmware/uvb/Makefile b/drivers/firmware/uvb/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d5a261794d9e4d909a5adf8da6281fd94d4d493e --- /dev/null +++ b/drivers/firmware/uvb/Makefile @@ -0,0 +1,16 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. +# Create : 2025-04-18 +# Description : cis odf Makefile + +ccflags-y += -I$(srctree)/drivers/firmware/uvb/include + +obj-$(CONFIG_UDFI) = odf/odf_get_fdt.o +obj-$(CONFIG_UDFI_ODF) += odf.o +odf-objs := odf/odf_trans.o \ + odf/odf_data.o \ + odf/odf_helper.o + +obj-$(CONFIG_UDFI_CIS) += cis.o +cis-objs := cis/cis_info_process.o \ + cis/uvb_info_process.o \ + cis/cis_core.o diff --git a/drivers/firmware/uvb/cis/cis_core.c b/drivers/firmware/uvb/cis/cis_core.c new file mode 100644 index 0000000000000000000000000000000000000000..4aa2d858ac6708e2aee2f7c4ff6eb4f9398ce65d --- /dev/null +++ b/drivers/firmware/uvb/cis/cis_core.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: Call ID Service (CIS) core module, manages inter-process communication + * via call identifiers with local/remote handling and UVB integration. + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include +#include +#include +#include +#include "cis_info_process.h" +#include "uvb_info_process.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Call ID Service Framework"); + +static struct task_struct *uvb_poll_window_thread; +DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); + +int create_uvb_poll_window_thread(void) +{ + uvb_poll_window_thread = kthread_run(uvb_poll_window, NULL, "uvb_poll_window_thread"); + if (IS_ERR(uvb_poll_window_thread)) { + pr_err("Failed to create uvb polling thread\n"); + return PTR_ERR(uvb_poll_window_thread); + } + + pr_info("create uvb poll window thread successfully\n"); + + return 0; +} + +void uvb_poll_window_thread_stop(void) +{ + if (uvb_poll_window_thread) { + kthread_stop(uvb_poll_window_thread); + uvb_poll_window_thread = NULL; + } +} + +static void free_uvb_window_lock(void) +{ + struct uvb_window_lock *entry; + struct hlist_node *tmp; + u32 bkt; + + if (hash_empty(uvb_lock_table)) + return; + + hash_for_each_safe(uvb_lock_table, bkt, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +static int uvb_window_lock_init(void) +{ + struct uvb *uvb; + struct uvb_window_lock *lock_node; + u16 i; + u16 j; + + for (i = 0; i < g_uvb_info->uvb_count; i++) { + uvb = g_uvb_info->uvbs[i]; + for (j = 0; j < uvb->window_count; j++) { + lock_node = kzalloc(sizeof(struct uvb_window_lock), GFP_KERNEL); + if (!lock_node) { + free_uvb_window_lock(); + return -ENOMEM; + } + lock_node->lock.counter = 0; + lock_node->window_address = uvb->wd[j].address; + hash_add(uvb_lock_table, &lock_node->node, uvb->wd[j].address); + } + } + pr_info("uvb window lock init success.\n"); + + return 0; +} + +int init_uvb(void) +{ + int err = 0; + + if (!g_uvb_info) { + pr_err("uvb is invalid, please try to use smc\n"); + return -EOPNOTSUPP; + } + + err = uvb_window_lock_init(); + if (err) { + pr_err("Init uvb window lock failed\n"); + return err; + } + + err = create_uvb_poll_window_thread(); + if (err) { + pr_err("create uvb poll thread did failed, err=%d\n", err); + free_uvb_window_lock(); + return err; + } + + return 0; +} + +int init_global_vars(void) +{ + io_param_sync = kzalloc(sizeof(struct cis_message), GFP_KERNEL); + if (!io_param_sync) + return -ENOMEM; + + return 0; +} + +int init_cis_table(void) +{ + if (!g_cis_info) { + pr_err("failed to get cis info from odf\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +void free_global_vars(void) +{ + kfree(io_param_sync); + io_param_sync = NULL; +} + +void uninit_uvb(void) +{ + uvb_poll_window_thread_stop(); + msleep(1000); + free_uvb_window_lock(); +} + +static int __init cis_init(void) +{ + int err = 0; + + err = init_cis_table(); + if (err) { + pr_err("cis info init failed, err=%d\n", err); + return err; + } + + err = init_global_vars(); + if (err) { + pr_err("global vars malloc failed, err=%d\n", err); + return err; + } + + err = init_uvb(); + if (err) { + pr_err("uvb init failed, err=%d\n", err); + free_global_vars(); + return err; + } + + pr_info("cis init success\n"); + + return 0; +} + +static void __exit cis_exit(void) +{ + uninit_uvb(); + free_global_vars(); + pr_info("cis exit success\n"); +} + +module_init(cis_init); +module_exit(cis_exit); + diff --git a/drivers/firmware/uvb/cis/cis_info_process.c b/drivers/firmware/uvb/cis/cis_info_process.c new file mode 100644 index 0000000000000000000000000000000000000000..d0811b2bd9fe59a999fcdac3b07bdf5adf51cfd5 --- /dev/null +++ b/drivers/firmware/uvb/cis/cis_info_process.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: Call ID Service (CIS) info processing module, handles CIS init, + * func register/lookup and group info retrieval. + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cis_info_process.h" +#include "uvb_info_process.h" + +static u32 uvb_poll_timeout = UVB_POLL_TIMEOUT; +module_param(uvb_poll_timeout, uint, 0644); +MODULE_PARM_DESC(uvb_poll_timeout, "set uvb poll timeout(ms), default 1200"); + +LIST_HEAD(g_local_cis_list); +DEFINE_SPINLOCK(cis_register_lock); +struct cis_message *io_param_sync; + +void ubios_prepare_output_data(struct cis_message *io_param, void *output, u32 *output_size) +{ + memcpy(output, io_param->output, *(io_param->p_output_size)); + *output_size = *(io_param->p_output_size); +} + +static bool is_call_id_supported(struct cis_group *group, u32 call_id) +{ + u32 i; + + for (i = 0; i < group->cis_count; i++) { + pr_debug("cia call_id: %08x\n", group->call_id[i]); + if (group->call_id[i] == call_id) + return true; + } + + return false; +} + +int get_cis_group_info(u32 call_id, u32 receiver_id, + u8 *usage, u8 *index, + u32 *exact_receiver_id, u32 *forwarder_id) +{ + u32 i; + + if (!g_cis_info) { + pr_err("can't get cis_info from odf\n"); + return -EOPNOTSUPP; + } + + for (i = 0; i < g_cis_info->group_count; i++) { + if (receiver_id != g_cis_info->groups[i]->owner_user_id && + receiver_id != ubios_get_user_type(g_cis_info->groups[i]->owner_user_id)) + continue; + if (is_call_id_supported(g_cis_info->groups[i], call_id)) { + *usage = g_cis_info->groups[i]->usage; + *index = g_cis_info->groups[i]->index; + *exact_receiver_id = g_cis_info->groups[i]->owner_user_id; + *forwarder_id = g_cis_info->groups[i]->forwarder_id; + return 0; + } + } + + if (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_UB_DEVICE) { + *usage = g_cis_info->ub.usage; + *index = g_cis_info->ub.index; + *exact_receiver_id = receiver_id; + *forwarder_id = g_cis_info->ub.forwarder_id; + pr_info("refresh info, usage=%d, index=%d, forward_id=%08x\n", + *usage, *index, *forwarder_id); + return 0; + } + + pr_err("call id: %08x not supported\n", call_id); + + return -EOPNOTSUPP; +} + +/* +Search Call ID Service owned by this component, return the function. +*/ +struct cis_func_node *search_local_cis_func_node(u32 call_id, u32 receiver_id) +{ + struct cis_func_node *cis_node = NULL; + struct cis_func_node *tmp; + + rcu_read_lock(); + list_for_each_entry_rcu(tmp, &g_local_cis_list, link) { + if ((tmp->call_id == call_id) && (tmp->receiver_id == receiver_id)) { + cis_node = tmp; + break; + } + } + rcu_read_unlock(); + + return cis_node; +} + +/* +Search local Call ID Service Functon according Call ID, return the function. +*/ +msg_handler search_local_cis_func(u32 call_id, u32 receiver_id) +{ + struct cis_func_node *cis_node; + + cis_node = search_local_cis_func_node(call_id, receiver_id); + if (cis_node) + return cis_node->func; + + return NULL; +} + +static bool cis_call_for_me(u32 receiver_id) +{ + if ((receiver_id == UBIOS_USER_ID_ALL) || + (receiver_id == ubios_get_user_type(UBIOS_MY_USER_ID)) || + (receiver_id == UBIOS_MY_USER_ID)) { + return true; + } + + return false; +} + +static bool cis_call_for_local(u32 receiver_id) +{ + if ((ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_UB_DEVICE) || + (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_PCIE_DEVICE)) { + return true; + } + + return false; +} + +static atomic_t *find_uvb_window_lock(u64 window_address) +{ + struct uvb_window_lock *entry; + + if (hash_empty(uvb_lock_table)) + return NULL; + + hash_for_each_possible(uvb_lock_table, entry, node, window_address) { + if (entry->window_address == window_address) + return &entry->lock; + } + + return NULL; +} + +static int try_obtain_uvb_window(u64 *wd_obtain, u32 sender_id) +{ + if (*wd_obtain == 0) { + *wd_obtain = sender_id; + return 1; + } + return 0; +} + +struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, u64 *wd_obtain) +{ + struct uvb_window_description *wd = NULL; + ktime_t start; + ktime_t now; + atomic_t *lock; + s64 time_interval; + u32 i; + u32 round; + + i = 0; + round = 0; + start = ktime_get(); + while (1) { + if (i >= uvb->window_count) { + i = 0; + round++; + } + wd = &(uvb->wd[i]); + wd_obtain = memremap(wd->obtain, wd->size, MEMREMAP_WC); + if (!wd_obtain) { + pr_err("uvb window obtain map failed\n"); + return NULL; + } + lock = find_uvb_window_lock(wd->address); + if (!lock) { + pr_err("uvb window lock not found\n"); + goto free_resources; + } + + if (atomic_cmpxchg(lock, 0, 1) == 0 + && try_obtain_uvb_window(wd_obtain, sender_id)) { + atomic_set(lock, 0); + udelay(uvb->delay); + if (*wd_obtain == sender_id) { + now = ktime_get(); + time_interval = ktime_to_us(ktime_sub(now, start)); + pr_info("occupy uvb window successfully, elapsed time: %lldus\n", + time_interval); + return wd; + } + } + + now = ktime_get(); + time_interval = ktime_to_us(ktime_sub(now, start)); + if (round > 1 && time_interval > UVB_TIMEOUT_WINDOW_OBTAIN) { + pr_err("obtain window timeout, tried %u * %u = %u times\n", + round, (u32)(uvb->window_count), round * (u32)(uvb->window_count)); + goto free_resources; + } + i++; + memunmap(wd_obtain); + wd_obtain = NULL; + } + +free_resources: + memunmap(wd_obtain); + wd_obtain = NULL; + + return NULL; +} + +void uvb_free_wd_obtain(u64 *wd_obtain) +{ + if (!wd_obtain) + return; + *wd_obtain = 0; + memunmap(wd_obtain); +} + +int uvb_free_window(struct uvb_window *window) +{ + if (!window) + return 0; + window->input_data_address = 0; + window->input_data_size = 0; + window->input_data_checksum = 0; + + window->output_data_address = 0; + window->output_data_size = 0; + window->output_data_checksum = 0; + window->returned_status = 0; + window->message_id = 0; + + dsb(sy); + isb(); + + window->receiver_id = 0; + window->sender_id = 0; + + return 0; +} + +static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, + struct uvb_window *window_address, + struct cis_message *io_params, + void *input, u32 input_size, + void *output, u32 *output_size) +{ + struct uvb_window *window; + void *new_input = NULL; + void *new_output = NULL; + + window = window_address; + if (output_size) { + if (wd->size < (u64)*output_size + (u64)input_size) { + pr_err("check wd size failed for output size\n"); + return -EOVERFLOW; + } + window->output_data_size = *output_size; + } else { + window->output_data_size = UVB_OUTPUT_SIZE_NULL; + } + + if (input) { + if (wd->size < input_size) { + pr_err("check wd size failed for input size\n"); + return -EOVERFLOW; + } + new_input = memremap(wd->buffer, wd->size, MEMREMAP_WC); + if (!new_input) { + pr_err("memremap for wd_buffer_virt_addr failed\n"); + return -ENOMEM; + } + memcpy(new_input, input, input_size); + window->input_data_checksum = checksum32(input, input_size); + } + + if (output) + new_output = (void *)(new_input + ALIGN(input_size, sizeof(u64))); + + io_params->input = new_input; + io_params->input_size = input_size; + io_params->output = new_output; + io_params->p_output_size = &(window->output_data_size); + + window->input_data_address = new_input ? wd->buffer : 0; + window->input_data_size = input_size; + window->output_data_address = new_output ? wd->buffer + ALIGN(input_size, sizeof(u64)) : 0; + + return 0; +} + +int uvb_fill_window(struct uvb_window_description *wd, struct uvb_window *wd_addr, + struct cis_message *io_params, struct udfi_para *para) +{ + int err; + struct uvb_window *window; + + window = wd_addr; + window->message_id = para->message_id; + window->sender_id = para->sender_id; + + err = fill_uvb_window_with_buffer(wd, window, io_params, para->input, + para->input_size, para->output, para->output_size); + if (err) { + pr_err("fill uvb window with buffer failed\n"); + uvb_free_window(window); + return err; + } + + window->receiver_id = para->receiver_id; + window->forwarder_id = para->forwarder_id; + pr_info("uvb fill window success\n"); + + return 0; +} + +int uvb_poll_window_call(struct uvb_window *window, u32 call_id) +{ + ktime_t start; + ktime_t now; + s64 time_interval; + + start = ktime_get(); + while (1) { + if (window->message_id == ~call_id) + return (int)window->returned_status; + + now = ktime_get(); + time_interval = ktime_to_ms(ktime_sub(now, start)); + if (time_interval > uvb_poll_timeout) + break; + } + + pr_err("uvb poll window call timeout,wait=%lld ms\n", time_interval); + + return -ETIMEDOUT; +} + +int uvb_poll_window_call_sync(struct uvb_window *window, u32 call_id) +{ + int i; + + pr_info("start uvb window polling\n"); + for (i = 0; i < uvb_poll_timeout * 10; i++) { + if (window->message_id == ~call_id) + return (int)window->returned_status; + + udelay(UVB_POLL_TIME_INTERVAL); + } + + pr_err("uvb poll window call sync timeout\n"); + + return -ETIMEDOUT; +} + +int uvb_get_output_data(struct uvb_window *window, + struct cis_message *io_param, void *output, u32 *output_size) +{ + if (!output || !output_size) + return 0; + + if (*output_size == 0) + return 0; + + if (window->output_data_address == 0 || window->output_data_size == UVB_OUTPUT_SIZE_NULL) + return 0; + + if (window->output_data_checksum != + checksum32(io_param->output, window->output_data_size)) { + pr_warn("returned data checksum error\n"); + return -EINVAL; + } + ubios_prepare_output_data(io_param, output, output_size); + + return 0; +} + +void free_io_param_with_buffer(struct cis_message *io_param) +{ + if (!io_param) + return; + + if (io_param->input) + memunmap(io_param->input); + kfree(io_param); +} + +int cis_call_uvb(u8 index, struct udfi_para *para) +{ + int err; + struct uvb_window *window = NULL; + struct uvb_window_description *wd = NULL; + struct cis_message *io_param = NULL; + u64 *wd_obtain = NULL; + + if (!g_uvb_info) { + pr_err("uvb unsupported\n"); + return -EOPNOTSUPP; + } + + if (index >= g_uvb_info->uvb_count) { + pr_err("cis call uvb index exceed uvb count\n"); + return -EOVERFLOW; + } + + wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); + if (!wd) { + pr_err("obtain window failed\n"); + return -EBUSY; + } + + if (!wd->buffer) { + pr_err("no window buffer to save data\n"); + goto free_obtain; + } + + io_param = kzalloc(sizeof(struct cis_message), GFP_KERNEL); + if (!io_param) { + err = -ENOMEM; + goto free_obtain; + } + + window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("memremap uvb window failed\n"); + err = -ENOMEM; + goto free_io_param; + } + + err = uvb_fill_window(wd, window, io_param, para); + if (err) { + pr_err("fill uvb window failed\n"); + goto unmap_window; + } + + err = uvb_poll_window_call(window, para->message_id); + if (err) { + pr_err("call by uvb failed\n"); + goto free_window; + } + + err = uvb_get_output_data(window, io_param, para->output, para->output_size); + if (err) + pr_err("uvb get output data failed\n"); + +free_window: + uvb_free_window(window); +unmap_window: + memunmap(window); +free_io_param: + free_io_param_with_buffer(io_param); +free_obtain: + uvb_free_wd_obtain(wd_obtain); + pr_info("finish cis call by uvb\n"); + + return err; +} + +int cis_call_uvb_sync(u8 index, struct udfi_para *para) +{ + int err; + struct uvb_window *window = NULL; + struct uvb_window_description *wd = NULL; + u64 *wd_obtain = NULL; + + memset(io_param_sync, 0, sizeof(struct cis_message)); + + if (!g_uvb_info) { + pr_err("sync call uvb unsupported\n"); + return -EOPNOTSUPP; + } + + if (index >= g_uvb_info->uvb_count) { + pr_err("sync call use uvb index exceed\n"); + return -EOVERFLOW; + } + + wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); + if (!wd) { + pr_err("sync call obtain window failed\n"); + return -EBUSY; + } + + if (!wd->buffer) { + pr_err("sync call no window buffer to save data\n"); + goto free_obtain; + } + + window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("sync call memremap window failed\n"); + err = -ENOMEM; + goto free_obtain; + } + + err = uvb_fill_window(wd, window, io_param_sync, para); + if (err) { + pr_err("sync call fill uvb window failed\n"); + goto unmap_window; + } + + err = uvb_poll_window_call_sync(window, para->message_id); + if (err) { + pr_err("sync call by uvb failed\n"); + goto free_window; + } + + err = uvb_get_output_data(window, io_param_sync, para->output, para->output_size); + if (err) + pr_err("sync call uvb get output data failed\n"); + +free_window: + uvb_free_window(window); + if (io_param_sync->input) + memunmap(io_param_sync->input); +unmap_window: + memunmap(window); +free_obtain: + uvb_free_wd_obtain(wd_obtain); + pr_info("finish cis sync call by uvb\n"); + + return err; +} + +int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, + bool is_sync) +{ + u32 forwarder_id; + u32 exact_receiver_id; + u8 usage; + u8 index; + int res; + struct udfi_para para = { 0 }; + + res = get_cis_group_info(call_id, receiver_id, + &usage, &index, &exact_receiver_id, &forwarder_id); + if (res) { + pr_err("can't get group info, call id=%08x, receiver id=%08x\n", + call_id, receiver_id); + return -EOPNOTSUPP; + } + + para.input = msg->input; + para.input_size = msg->input_size; + para.output = msg->output; + para.output_size = msg->p_output_size; + para.message_id = call_id; + para.receiver_id = exact_receiver_id; + para.sender_id = sender_id; + para.forwarder_id = forwarder_id; + + if (usage != CIS_USAGE_UVB) { + pr_err("method not supported, call id=%08x, receiver id=%08x, usage=%d\n", + call_id, receiver_id, usage); + return -EOPNOTSUPP; + } + + if (is_sync) + return cis_call_uvb_sync(index, ¶); + + return cis_call_uvb(index, ¶); +} + +static bool check_msg_vaild(struct cis_message *msg) +{ + if (!msg) + return false; + + if (msg->input && !msg->input_size) + return false; + + if (!msg->input && msg->input_size) + return false; + + if (msg->output && (!msg->p_output_size || !*msg->p_output_size)) + return false; + + if (!msg->output && msg->p_output_size && *msg->p_output_size) + return false; + + return true; +} + +/** + * cis_call - Trigger a cis call with given aruguments. + * + * @call_id: call id that identifies which cis call will be triggered. + * @sender_id: user id of sender. + * @receiver_id: user id of receiver. + * @msg: the data that the user needs to transmit. + * @is_sync: whether to use a synchronous interface. + * + * Search for cia (call id attribute) in cis info with given call id and receiver id. + * The `usage` property of cia determines which method to used (uvb/arch call). + * Return 0 if cis call succeeds or communication method is not supported, + * else return cis error code. + */ +int cis_call_by_uvb(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, bool is_sync) +{ + int ret; + msg_handler func; + + pr_debug("cis call: call id %08x, sender id %08x, receiver id %08x\n", + call_id, sender_id, receiver_id); + + if (!sender_id || !receiver_id) { + pr_err("senderid or receiverid can't be null\n"); + return -EINVAL; + } + + if (!check_msg_vaild(msg)) { + pr_err("check cis message invalid\n"); + return -EINVAL; + } + + if (cis_call_for_me(receiver_id) || cis_call_for_local(receiver_id)) { + func = search_local_cis_func(call_id, receiver_id); + if (func) { + ret = func(msg); + if (ret) { + pr_err("cis call execute registered cis func failed\n"); + return ret; + } + pr_info("cis call execute registered cis func success\n"); + return 0; + } + pr_err("can't found cis func for callid=%08x, receiver_id=%08x\n", + call_id, receiver_id); + return -EOPNOTSUPP; + } + + return cis_call_remote(call_id, sender_id, receiver_id, msg, is_sync); +} +EXPORT_SYMBOL(cis_call_by_uvb); + +/* +Register a Call ID Service +@call_id - UBIOS Interface ID +@receiver_id - UBIOS User ID who own this CIS +@func - Callback function of Call ID +*/ +int register_local_cis_func(u32 call_id, u32 receiver_id, msg_handler func) +{ + struct cis_func_node *p; + unsigned long flags; + + if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { + pr_err("register is not uvb call\n"); + return -EINVAL; + } + if (!func) { + pr_err("register func is NULL\n"); + return -EINVAL; + } + + /* check is this Call ID already has a funciton */ + if (search_local_cis_func_node(call_id, receiver_id)) { + pr_err("cis register: call_id[%08x], receiver_id[%08x], already register func\n", + call_id, receiver_id); + return -EINVAL; + } + + p = kcalloc(1, sizeof(struct cis_func_node), GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->call_id = call_id; + p->receiver_id = receiver_id; + p->func = func; + + spin_lock_irqsave(&cis_register_lock, flags); + list_add_tail_rcu(&p->link, &g_local_cis_list); + spin_unlock_irqrestore(&cis_register_lock, flags); + pr_info("register cis func success\n"); + + return 0; +} +EXPORT_SYMBOL(register_local_cis_func); + +/* +Unregister a Call ID Service +@call_id - UBIOS Interface ID +@receiver_id - UBIOS User ID who own this CIS +*/ +int unregister_local_cis_func(u32 call_id, u32 receiver_id) +{ + struct cis_func_node *p; + unsigned long flags; + + if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { + pr_err("unregister is not uvb call\n"); + return -EINVAL; + } + + p = search_local_cis_func_node(call_id, receiver_id); + if (!p) { + pr_err("cis unregister: call_id[%08x], receiver_id[%08x] not find func node.\n", + call_id, receiver_id); + return -EINVAL; + } + + spin_lock_irqsave(&cis_register_lock, flags); + list_del_rcu(&p->link); + spin_unlock_irqrestore(&cis_register_lock, flags); + synchronize_rcu(); + + kfree(p); + pr_info("unregister cis func success\n"); + + return 0; +} +EXPORT_SYMBOL(unregister_local_cis_func); + diff --git a/drivers/firmware/uvb/cis/cis_info_process.h b/drivers/firmware/uvb/cis/cis_info_process.h new file mode 100644 index 0000000000000000000000000000000000000000..4713e5306d7714ed7aa0e585a9fb666d97af77ec --- /dev/null +++ b/drivers/firmware/uvb/cis/cis_info_process.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: cis info process header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef CIS_INFO_PROCESS_H +#define CIS_INFO_PROCESS_H + +#include "cis_uvb_interface.h" + +#define CIS_USAGE_UVB 2 +#define MAX_UVB_LOCK_IN_BITS 8 +#define UVB_POLL_TIME_INTERVAL (100) /* 100us */ +#define UVB_POLL_TIMEOUT (1200) /* 1200ms */ +#define UVB_TIMEOUT_WINDOW_OBTAIN (10000) /* 10000us */ +#define UVB_POLL_TIMEOUT_TIMES (10000) /* 10000 times */ + +extern struct cis_message *io_param_sync; +extern struct list_head g_local_cis_list; +extern DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); + +struct udfi_para { + u32 message_id; + u32 sender_id; + u32 receiver_id; + u32 forwarder_id; + void *input; + u32 input_size; + void *output; + u32 *output_size; +}; + +struct cis_func_node { + struct list_head link; + u32 call_id; + u32 receiver_id; + msg_handler func; +}; + +struct uvb_window_lock { + atomic_t lock; + u64 window_address; + struct hlist_node node; +}; + +int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, + bool is_sync); +msg_handler search_local_cis_func(u32 call_id, u32 receiver_id); + +static inline u32 ubios_get_user_type(u32 user_id) +{ + return user_id & UBIOS_USER_TYPE_MASK; +} +static inline u32 ubios_get_user_index(u32 user_id) +{ + return user_id & UBIOS_USER_INDEX_MASK; +} + +#endif diff --git a/drivers/firmware/uvb/cis/uvb_info_process.c b/drivers/firmware/uvb/cis/uvb_info_process.c new file mode 100644 index 0000000000000000000000000000000000000000..654ff02efabb45dca3b6ad3fdd9eb3336ac0d9da --- /dev/null +++ b/drivers/firmware/uvb/cis/uvb_info_process.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: UVB info processing module, handles init and window polling. + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include +#include +#include +#include "cis_info_process.h" +#include "uvb_info_process.h" + +/** +Calculate checksum in 4bytes, if size not aligned with 4bytes, padding with 0. +*/ +u32 checksum32(const void *data, u32 size) +{ + u64 i; + u64 sum = 0; + u32 remainder = size % sizeof(u32); + u32 *p = (u32 *)data; + u32 restsize = size - remainder; + + if (!data) + return (u32)-1; + + for (i = 0; i < restsize; i += sizeof(u32)) { + sum += *p; + p++; + } + + switch (remainder) { + case 1: + sum += (*p) & 0x000000FF; + break; + case 2: + sum += (*p) & 0x0000FFFF; + break; + case 3: + sum += (*p) & 0x00FFFFFF; + break; + default: + break; + } + + return (u32)(sum); +} + +static bool is_address_exceed(void *buffer, u32 buffer_size, void *input_address, + u32 input_size, void *output_address, u32 *output_size) +{ + void *end_of_buffer = buffer + buffer_size; + + if (input_address) { + if ((input_address < buffer) || (input_address + input_size >= end_of_buffer)) { + pr_err("input address exceed.\n"); + return true; + } + } + + if (output_address && output_size) { + if ((output_address < buffer + input_size) + || (output_address + *output_size >= end_of_buffer)) { + pr_err("output address exceed.\n"); + return true; + } + } + + return false; +} + +static int uvb_get_input_data(struct uvb_window *window, void *buffer, u32 buffer_size, + struct cis_message *msg, void *virt_input, void *virt_output) +{ + msg->input_size = window->input_data_size; + if (window->output_data_size == UVB_OUTPUT_SIZE_NULL) + msg->p_output_size = NULL; + else + msg->p_output_size = &window->output_data_size; + + if (!buffer) { + msg->input = (void *)window->input_data_address; + msg->output = (void *)window->output_data_address; + } else { + msg->input = (window->input_data_address == 0 ? NULL : buffer); + msg->output = (window->output_data_address == 0 ? NULL : + ((u8 *)buffer + ALIGN(msg->input_size, sizeof(u64)))); + if (is_address_exceed(buffer, buffer_size, msg->input, msg->input_size, + msg->output, msg->p_output_size)) { + pr_err("address is exceed\n"); + return -EOVERFLOW; + } + } + if (msg->input && msg->input_size) { + virt_input = memremap((u64)msg->input, msg->input_size, MEMREMAP_WC); + if (!virt_input) { + pr_err("memremap for input failed\n"); + return -ENOMEM; + } + msg->input = virt_input; + } + if (msg->output && msg->p_output_size && *msg->p_output_size) { + virt_output = memremap((u64)msg->output, *msg->p_output_size, MEMREMAP_WC); + if (!virt_output) { + pr_err("memremap for output failed\n"); + return -ENOMEM; + } + msg->output = virt_output; + } + if (msg->input_size) { + if (window->input_data_checksum != checksum32(msg->input, msg->input_size)) { + pr_err("input data checksum error\n"); + return -EINVAL; + } + } + return 0; +} + +static void uvb_return_status(struct uvb_window *window, int status) +{ + window->returned_status = (u32)status; + window->message_id = ~window->message_id; +} + +bool search_local_receiver_id(u32 receiver_id) +{ + bool found = false; + struct cis_func_node *cis_node; + + rcu_read_lock(); + list_for_each_entry_rcu(cis_node, &g_local_cis_list, link) { + if (cis_node->receiver_id == receiver_id) { + found = true; + break; + } + } + rcu_read_unlock(); + + return found; +} + +static void uvb_polling_window(struct uvb_window_description *wd) +{ + int err = 0; + bool found; + u32 receiver_id, message_id; + struct uvb_window *window = NULL; + struct cis_message msg = { 0 }; + msg_handler func; + void *virt_addr_input = NULL; + void *virt_addr_output = NULL; + + window = (struct uvb_window *)memremap(wd->address, + sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("polling window failed to map window addr\n"); + return; + } + receiver_id = window->receiver_id; + message_id = window->message_id; + + if (window->receiver_id) { + pr_debug("UVB window address: %llx\n", wd->address); + pr_debug("Version = %08x\n", window->version); + pr_debug("Message ID = %08x\n", window->message_id); + pr_debug("Sender ID = %08x\n", window->sender_id); + pr_debug("Receiver ID = %08x\n", window->receiver_id); + pr_debug("Forwarder ID = %08x\n", window->forwarder_id); + pr_debug("Input Data Address = %llx\n", window->input_data_address); + pr_debug("Input Data Size = %08x\n", window->input_data_size); + pr_debug("Output Data Address = %llx\n", window->output_data_address); + pr_debug("Output Data Size = %08x\n", window->output_data_size); + pr_debug("Returned Status = %08x\n", window->returned_status); + pr_debug("Buffer = %llx, size = %08x\n", wd->buffer, wd->size); + } + + found = search_local_receiver_id(receiver_id); + if (found) { + pr_debug("polling window start for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + window->receiver_id = 0; + /* get input data and check */ + err = uvb_get_input_data(window, (void *)wd->buffer, wd->size, + &msg, virt_addr_input, virt_addr_output); + if (err) { + uvb_return_status(window, err); + goto free_resources; + } + func = search_local_cis_func(message_id, receiver_id); + if (func) { + err = func(&msg); + if (!err && msg.output && msg.p_output_size && *msg.p_output_size) + window->output_data_checksum = + checksum32(msg.output, *msg.p_output_size); + } else { + pr_err("polling window not found local cis func for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + err = -EOPNOTSUPP; + } + pr_info("polling window execute local cis func success\n"); + uvb_return_status(window, err); + goto free_resources; + /* need uvb to forward */ + } else if (window->forwarder_id == UBIOS_MY_USER_ID) { + pr_info("cis call forward start\n"); + window->forwarder_id = 0; + + err = uvb_get_input_data(window, (void *)wd->buffer, wd->size, + &msg, virt_addr_input, virt_addr_output); + if (err) { + uvb_return_status(window, err); + goto free_resources; + } + err = cis_call_remote(message_id, UBIOS_MY_USER_ID, receiver_id, &msg, false); + if (!err && msg.output && msg.p_output_size && *msg.p_output_size) + window->output_data_checksum = + checksum32(msg.output, *msg.p_output_size); + pr_info("cis call forward end\n"); + uvb_return_status(window, err); + goto free_resources; + } + +free_resources: + if (virt_addr_input) + memunmap(virt_addr_input); + + if (virt_addr_output) + memunmap(virt_addr_output); + + if (window) + memunmap(window); +} + +static int uvb_polling_window_sync(struct uvb_window_description *wd) +{ + int err = -EAGAIN; + bool found; + struct uvb_window *window = NULL; + u32 receiver_id, message_id; + struct cis_message msg; + msg_handler func; + void *virt_addr_input = NULL; + void *virt_addr_output = NULL; + + window = (struct uvb_window *)memremap(wd->address, + sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("polling window sync failed to map window addr\n"); + return -ENOMEM; + } + + receiver_id = window->receiver_id; + message_id = window->message_id; + + found = search_local_receiver_id(receiver_id); + if (found) { + pr_debug("polling window sync start for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + window->receiver_id = 0; + err = uvb_get_input_data(window, (void *)wd->buffer, wd->size, + &msg, virt_addr_input, virt_addr_output); + if (err) { + err = -EINVAL; + uvb_return_status(window, err); + goto free_resources; + } + func = search_local_cis_func(message_id, receiver_id); + if (func) { + err = func(&msg); + if (!err && msg.output && msg.p_output_size && *msg.p_output_size) + window->output_data_checksum = + checksum32(msg.output, *msg.p_output_size); + if (err) + err = -EPERM; + } else { + pr_err("polling window sync not found cis func for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + err = -EOPNOTSUPP; + } + pr_info("polling window sync execute local cis func success\n"); + uvb_return_status(window, err); + goto free_resources; + } + +free_resources: + if (virt_addr_input) + memunmap(virt_addr_input); + + if (virt_addr_output) + memunmap(virt_addr_output); + + if (window) + memunmap(window); + + return err; +} + +int uvb_poll_window(void *data) +{ + int i; + int j; + struct uvb *uvb; + + while (!kthread_should_stop()) { + for (i = 0; i < g_uvb_info->uvb_count; i++) { + uvb = g_uvb_info->uvbs[i]; + if (!uvb) + continue; + + if (uvb->window_count == 0) + continue; + + for (j = 0; j < uvb->window_count; j++) + uvb_polling_window(&uvb->wd[j]); + } + msleep(1); + } + + return 0; +} + +int uvb_polling_sync(void *data) +{ + int i; + int j; + int index; + int err; + struct uvb *uvb; + + for (index = 0; index < UVB_POLL_TIMEOUT_TIMES; index++) { + for (i = 0; i < g_uvb_info->uvb_count; i++) { + uvb = g_uvb_info->uvbs[i]; + if (!uvb) + continue; + + if (uvb->window_count == 0) + continue; + + for (j = 0; j < uvb->window_count; j++) { + err = uvb_polling_window_sync(&uvb->wd[j]); + if (err == -EAGAIN) + continue; + return err; + } + } + udelay(UVB_POLL_TIME_INTERVAL); + } + + pr_err("timeout occurred after 1s\n"); + + return -ETIMEDOUT; +} +EXPORT_SYMBOL(uvb_polling_sync); + + diff --git a/drivers/firmware/uvb/cis/uvb_info_process.h b/drivers/firmware/uvb/cis/uvb_info_process.h new file mode 100644 index 0000000000000000000000000000000000000000..7f7f0e7362dbf518714557a9cfb28d8c5a487df6 --- /dev/null +++ b/drivers/firmware/uvb/cis/uvb_info_process.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: uvb info process header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef UVB_INFO_PROCESS_H +#define UVB_INFO_PROCESS_H + +int uvb_poll_window(void *data); +u32 checksum32(const void *data, u32 size); +#endif diff --git a/drivers/firmware/uvb/include/cis_uvb_interface.h b/drivers/firmware/uvb/include/cis_uvb_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..513cb42372da9a2b28b92e65db2b470cd3423b3c --- /dev/null +++ b/drivers/firmware/uvb/include/cis_uvb_interface.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: cis uvb interface header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef CIS_UVB_INTERFACE_H +#define CIS_UVB_INTERFACE_H +#include + +/** + * struct cis_group - call id service group + * @owner_user_id: user id that indicates which component owns the cia[] array + * @cis_count: number of cia in the group + * @cia: array of call id attribute + * @forwarder_id forwarder id + */ +struct cis_group { + u32 owner_user_id; + u32 cis_count; + u8 usage; + u8 index; + u32 forwarder_id; + u32 call_id[]; +}; + +/** + * struct cis_ub - call id service ub struct + * @usage: usage for channel + * @index: index for uvb + * @forwarder_id forwarder id + */ +struct cis_ub { + u8 usage; + u8 index; + u32 forwarder_id; +}; + +/** + * struct cis_info - call id service information + * @group_count: number of cis group + * @groups: array of cis group + */ +struct cis_info { + u32 group_count; + u32 reserved; + struct cis_ub ub; + struct cis_group *groups[]; +}; + + +extern struct cis_info *g_cis_info; + +#define UVB_OUTPUT_SIZE_NULL 0xFFFFFFFF +#define UVB_WINDOW_COUNT_MAX 0xFF + +/** + * struct uvb_window + * @version: uvb window version + * @message_id: call id + * @sender_id: user id of caller + * @receiver_id: user id of callee + * @input_data_address: input data physical address + * @input_date_size: input data size + * @input_data_checksum: input data checksum, not used yet + * @output_data_address: output data physical address + * @output_data_size: output data size + * @output_data_checksum: output data checksum, not used yet + * @returned_status: UVB window index, if usage indicates UVB + */ +struct uvb_window { + u8 version; + u8 reserved1[3]; + u32 message_id; + u32 sender_id; + u32 receiver_id; + u64 input_data_address; + u32 input_data_size; + u32 input_data_checksum; + u64 output_data_address; + u32 output_data_size; + u32 output_data_checksum; + u32 returned_status; + u8 reserved2[8]; + u32 forwarder_id; +}; +struct uvb_window_description { + u64 obtain; /* This address is used to obtain this window */ + u64 address; /* The address of uvb window */ + u64 buffer; /* Buffer address of this window, 0 if no buffer */ + u32 size; /* The size of buffer, same for all windows in one uvb */ + u32 reserved; +}; + +struct uvb { + u8 window_count; + bool secure; + u16 delay; /* us */ + u32 reserved; + struct uvb_window_description wd[]; +}; + +struct uvb_info { + u8 uvb_count; + u8 reserved[7]; + struct uvb *uvbs[]; +}; + +extern struct uvb_info *g_uvb_info; + +#endif diff --git a/drivers/firmware/uvb/odf/odf_data.c b/drivers/firmware/uvb/odf/odf_data.c new file mode 100644 index 0000000000000000000000000000000000000000..af9383759121eb550764a84408fe32480873ca1d --- /dev/null +++ b/drivers/firmware/uvb/odf/odf_data.c @@ -0,0 +1,729 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: ODF data processing, handles ODF various odf data structures + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include "odf_interface.h" +#include "odf_handle.h" +#include "cis_uvb_interface.h" + +/** +@brief Search and match one value name, return the pointer of value structrue if matched. +@param[in] start start address of the search. +@param[in] end end address of the search. +@param[in] name value name. +@param[out] vs used to return value structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_vs_by_name(u8 *start, u8 *end, char *name, struct ubios_od_value_struct *vs) +{ + struct ubios_od_value_struct temp; + + if (!start || !end || !name || !vs) + return -EINVAL; + + if (start >= end) + return -ENOENT; + + odf_get_vs_by_pointer(start, &temp); + + if (strcmp(name, temp.name) == 0) { + *vs = temp; + return 0; + } + + return odf_get_vs_by_name(temp.data + temp.data_length, end, name, vs); +} + +static void odf_vs_to_list(struct ubios_od_value_struct *vs, struct ubios_od_list_info *list) +{ + list->name = vs->name; + list->data_type = vs->type & ~UBIOS_OD_TYPE_LIST; + list->count = odf_read16(vs->data); + list->start = vs->data + sizeof(u16); + list->end = vs->data + vs->data_length; +} + +/** +Change value structure by index in a list, the name will not be changed, +Both change value pointer and length and type. +note: + index could be 0, that means get the first one in list. +*/ +static int odf_change_vs_in_list(struct ubios_od_value_struct *vs, u16 index) +{ + struct ubios_od_list_info list; + + odf_vs_to_list(vs, &list); + + return odf_get_data_from_list(&list, index, vs); +} + +/** +Change the value structure with index, move the pointer to the data indicated by index, +and update length. +Note: +Only list support index in path, other type will return not support if index != 0. +*/ +static int odf_change_vs_by_index(struct ubios_od_value_struct *vs, u16 index) +{ + if ((vs->type & UBIOS_OD_TYPE_LIST) == UBIOS_OD_TYPE_LIST) + return odf_change_vs_in_list(vs, index); + + if (index > 0) + return -EOPNOTSUPP; + else + return 0; +} + +/** +Search one od file, input value path, output the value structure, contains value info +*/ +static int odf_get_vs_from_file(u8 *file, char *path, struct ubios_od_value_struct *vs) +{ + int status; + u16 index; + char name[UBIOS_OD_NAME_LEN_MAX]; + struct ubios_od_header *header = (struct ubios_od_header *)file; + bool is_got_vs = false; + + if (!is_od_file_valid(file)) { + pr_err("odf: file[%llx] invalid\n", (u64)file); + return -EINVAL; + } + + /* start from the od file data */ + vs->data = (u8 *)(header + 1); + vs->data_length = header->total_size - header->remaining_size - + sizeof(struct ubios_od_header); + while (odf_separate_name(&path, name, UBIOS_OD_NAME_LEN_MAX, &index) == 0) { + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, vs); + if (status) { + pr_err("odf: can not find name[%s]'s value\n", name); + return status; + } + is_got_vs = true; + if (index != UBIOS_OD_INVALID_INDEX) { + status = odf_change_vs_by_index(vs, index); + if (status) { + pr_err("odf: get value by index failed, name[%s], type[%#x], index[%#x]\n", + name, vs->type, index); + return status; + } + } + } + if ((is_got_vs) && !path) + return 0; + + pr_err("odf: failed, left path[%s]\n", path); + + return -EOPNOTSUPP; +} + +/** +Search all od file in the root, input value path, output the value structure, contains value info. +If file is not NULL, also return od file, could used to update info of od file, such as checksum. +*/ +static int odf_get_vs_from_root(struct ubios_od_root *root, char *path, + u8 **file, struct ubios_od_value_struct *vs) +{ + int status; + char name[UBIOS_OD_NAME_LEN_MAX]; + u8 *od_file = NULL; + + status = odf_separate_name(&path, name, UBIOS_OD_NAME_LEN_MAX, NULL); + if (status) { + pr_err("odf: get od file name failed, %d\n", status); + return status; + } + + od_file = odf_get_od_file(root, name); + if (!od_file) { + pr_err("odf: can not find od file[%s]\n", name); + return -ENOENT; + } + + if (file) + *file = od_file; + + return odf_get_vs_from_file(od_file, path, vs); +} + +static bool is_root_and_path_valid(struct ubios_od_root *root, char *path) +{ + if (!is_od_root_valid(root)) + return false; + + if (!path) { + pr_err("odf: path is NULL\n"); + return false; + } + + return true; +} + + +/** +@brief Get table information like row, colomn, sub types, .etc. +@param[in] vs value structure +@param[out] table_info used to return table info. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_vs_to_table(struct ubios_od_value_struct *vs, struct ubios_od_table_info *table_info) +{ + u64 i; + u8 type; + u8 *p = vs->data; + + table_info->table_name = vs->name; + table_info->length_per_row = 0; + table_info->row = odf_read16(p); + p += sizeof(u16); + table_info->col = odf_read8(p); + p += sizeof(u8); + table_info->sub_name_start = (char *)p; + + for (i = 0; i < table_info->col; i++) { + p += strlen((char *)p) + 1; + type = odf_read8(p); + p++; + switch (type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + table_info->length_per_row += sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + table_info->length_per_row += sizeof(u16); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + table_info->length_per_row += sizeof(u32); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + table_info->length_per_row += sizeof(u64); + break; + default: + pr_err("odf: get table[%s] info, invalid type[%d] of column[%llu]\n", + table_info->table_name, type, i); + return -EOPNOTSUPP; + } + } + table_info->value_start = p; + table_info->table_end = table_info->value_start + + table_info->length_per_row * table_info->row; + + return 0; +} + +/** +@brief Get a value's offset in row of table, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] name name of data in table wanted to get. +@param[in] type data type. +@param[out] offset used to return offset in the row. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_offset_in_table(const struct ubios_od_table_info *table, + char *name, u8 type, u32 *offset) +{ + u64 i; + u8 data_type; + u32 temp_offset = 0; + char *sub_name = NULL; + + if (!table || !name || !offset) + return -EINVAL; + + /* fisrt sub name */ + sub_name = table->sub_name_start; + for (i = 0; i < table->col; i++) { + data_type = odf_read8((u8 *)sub_name + strlen(sub_name) + 1); + if (strcmp(name, sub_name) == 0) + break; + sub_name += strlen(sub_name) + 1 + sizeof(data_type); + switch (data_type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + temp_offset += sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + temp_offset += sizeof(u16); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + temp_offset += sizeof(u32); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + temp_offset += sizeof(u64); + break; + default: + pr_err("odf: get table info, invalid type[%d] of column[%llu]\n", + data_type, i); + return -EOPNOTSUPP; + } + } + if (i == table->col) + return -ENOENT; + + if (type != data_type) + return -EFAULT; + + *offset = temp_offset; + + return 0; +} + +/** +@brief Get a value pointer from table according name and row, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] name name of data in table wanted to get. +@param[in] row the row of table wanted to get. +@param[in] type data type. +@param[out] data used to return data pointer. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_data_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 type, void *value) +{ + int status; + u32 offset; + u8 *p; + + if (!table || !name || !value) + return -EINVAL; + + if (row >= table->row) + return -EOVERFLOW; + + status = odf_get_offset_in_table(table, name, type, &offset); + if (status) + return status; + + p = table->value_start + table->length_per_row * row + offset; + switch (type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + *(u8 *)value = odf_read8(p); + break; + case UBIOS_OD_TYPE_S8: + *(s8 *)value = (s8)odf_read8(p); + break; + case UBIOS_OD_TYPE_U16: + *(u16 *)value = odf_read16(p); + break; + case UBIOS_OD_TYPE_S16: + *(s16 *)value = (s16)odf_read16(p); + break; + case UBIOS_OD_TYPE_U32: + *(u32 *)value = odf_read32(p); + break; + case UBIOS_OD_TYPE_S32: + *(s32 *)value = (s32)odf_read32(p); + break; + case UBIOS_OD_TYPE_U64: + *(u64 *)value = odf_read64(p); + break; + case UBIOS_OD_TYPE_S64: + *(s64 *)value = (s64)odf_read64(p); + break; + default: + pr_err("odf: get table data failed, invalid type[%#x]\n", type); + return -EOPNOTSUPP; + } + + return status; +} + +int odf_get_u8_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 *value) +{ + return odf_get_data_from_table(table, row, name, UBIOS_OD_TYPE_U8, value); +} + +int odf_get_u32_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u32 *value) +{ + return odf_get_data_from_table(table, row, name, UBIOS_OD_TYPE_U32, value); +} + +int odf_get_u64_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u64 *value) +{ + return odf_get_data_from_table(table, row, name, UBIOS_OD_TYPE_U64, value); +} + +int odf_get_vs_from_table(u8 *table, char *path, struct ubios_od_value_struct *vs) +{ + if (!table || !vs || !path) + return -EINVAL; + + return odf_get_vs_from_file(table, path, vs); +} + +int odf_get_list_from_table(u8 *table, char *path, struct ubios_od_list_info *list) +{ + int status; + struct ubios_od_value_struct vs; + + if (!table || !list) + return -EINVAL; + + status = odf_get_vs_from_table(table, path, &vs); + if (status) + return status; + + if ((vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { + pr_err("odf:the type[%#x] is not a list\n", vs.type); + return -EFAULT; + } + + odf_vs_to_list(&vs, list); + + return 0; +} + +/** +@brief Get a ubios od value struct from od root according to the path +@param[in] root root pointer of od +@param[in] path full path to search, if not include index of table. +@param[out] vs used to return a ubios od value struct. +@return returned status fo the call +@retval = 0, get ubios od value struct success, saved in parameter vs. +@retval < 0, get ubios od value struct failed. +*/ +int odf_get_struct(struct ubios_od_root *root, char *path, struct ubios_od_value_struct *vs) +{ + int status; + + if (!is_root_and_path_valid(root, path)) + return -EINVAL; + + status = odf_get_vs_from_root(root, path, NULL, vs); + + return status; +} + +/** +@brief Get a list from od root, will return a list info structure. +@param[in] root root pointer of od +@param[in] path full path to search, if not include index of table. +@param[out] list used to return a list info structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_list(struct ubios_od_root *root, char *path, struct ubios_od_list_info *list) +{ + int status; + struct ubios_od_value_struct vs; + + if (!is_root_and_path_valid(root, path) || !list) + return -EINVAL; + + status = odf_get_vs_from_root(root, path, NULL, &vs); + if (status) + return status; + + if ((vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { + pr_err("the type[%#x] is not a list\n", vs.type); + return -EFAULT; + } + + odf_vs_to_list(&vs, list); + + return 0; +} + +int odf_get_u32_from_list(const struct ubios_od_list_info *list, u16 index, u32 *value) +{ + if (!value) + return -EINVAL; + + if (list->data_type != UBIOS_OD_TYPE_U32) + return -EFAULT; + + *value = odf_read32(list->start + sizeof(u32) * index); + + return 0; +} + +/** +@brief Get a value structure from list by index. +@param[in] list list get by function OdfGetList +@param[in] index index in list to get. +@param[out] vs used to return a value structrue +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + Usually this function is useful when the data type in list is struct, get value structure, + then use OdfGetVsByName to search inside. +*/ +int odf_get_data_from_list(const struct ubios_od_list_info *list, + u16 index, struct ubios_od_value_struct *vs) +{ + u64 i; + u32 len; + u8 *p; + + if (!list || !vs) + return -EINVAL; + + if (index >= list->count) + return -EOVERFLOW; + + vs->name = list->name; + vs->type = list->data_type; + p = list->start; + switch (vs->type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + vs->data = list->start + index * sizeof(u8); + vs->data_length = sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + vs->data = list->start + index * sizeof(u16); + vs->data_length = sizeof(u16); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + vs->data = list->start + index * sizeof(u32); + vs->data_length = sizeof(u32); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + vs->data = list->start + index * sizeof(u64); + vs->data_length = sizeof(u64); + break; + case UBIOS_OD_TYPE_STRING: + for (i = 0; i < index; i++) + p += (strlen((char *)p) + 1); + vs->data = p; + vs->data_length = (u32)strlen((char *)p) + 1; + break; + case UBIOS_OD_TYPE_STRUCT: + for (i = 0; i < index; i++) { + len = odf_read32(p); + p += (sizeof(u32) + len); + } + vs->data = p + sizeof(u32); + vs->data_length = odf_read32(p); + break; + default: + pr_err("odf: invalid type[%#x], not support\n", vs->type); + return -EOPNOTSUPP; + } + + return 0; +} + +/** +@brief Get next value of a list. +@note: + The caller should ensure the input structure is a member of list, + this function can only check some of this. +*/ +int odf_next_in_list(const struct ubios_od_list_info *list, struct ubios_od_value_struct *vs) +{ + u8 *p; + + if (!vs) + return -EINVAL; + + if (list->data_type != vs->type || strcmp(list->name, vs->name)) + return -EFAULT; + + switch (vs->type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + vs->data = vs->data + vs->data_length; + break; + case UBIOS_OD_TYPE_STRING: + vs->data = vs->data + vs->data_length; + vs->data_length = (u32)strlen((char *)vs->data) + 1; + break; + case UBIOS_OD_TYPE_STRUCT: + p = vs->data + vs->data_length; + vs->data_length = odf_read32(p); + vs->data = p + sizeof(u32); + break; + default: + pr_err("odf: invalid type[%#x], not support\n", vs->type); + return -EOPNOTSUPP; + } + if (vs->data >= list->end) + return -EOVERFLOW; + + return 0; +} + +/** +Internal function, get data pointer by path and type. +*/ +static int odf_get_data_and_check_type(const struct ubios_od_value_struct *vs, + char *name, u8 type, void **data) +{ + int status; + struct ubios_od_value_struct temp_vs; + + if (!vs || !name || !data) + return -EINVAL; + + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, &temp_vs); + if (status) + return status; + + if (temp_vs.type != type) + return -EFAULT; + + *data = temp_vs.data; + + return 0; +} + +int odf_get_u8_from_struct(const struct ubios_od_value_struct *vs, char *name, u8 *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_U8, (void **)&data); + if (status) + return status; + + *value = odf_read8(data); + + return 0; +} + +int odf_get_u16_from_struct(const struct ubios_od_value_struct *vs, char *name, u16 *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_U16, (void **)&data); + if (status) + return status; + + *value = odf_read16(data); + + return 0; +} + +int odf_get_u32_from_struct(const struct ubios_od_value_struct *vs, char *name, u32 *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_U32, (void **)&data); + if (status) + return status; + + *value = odf_read32(data); + + return 0; +} + +int odf_get_bool_from_struct(const struct ubios_od_value_struct *vs, char *name, bool *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_BOOL, (void **)&data); + if (status) + return status; + + *value = odf_read8(data); + + return 0; +} + +/** +Get table in the value structure. +*/ +int odf_get_table_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_table_info *table) +{ + int status; + struct ubios_od_value_struct temp_vs; + + if (!vs || !name || !table) + return -EINVAL; + + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, &temp_vs); + if (status) + return status; + + if (temp_vs.type != UBIOS_OD_TYPE_TABLE) + return -EFAULT; + + return odf_vs_to_table(&temp_vs, table); +} + +int odf_get_list_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_list_info *list) +{ + int status; + struct ubios_od_value_struct temp_vs; + + if (!vs || !name || !list) + return -EINVAL; + + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, &temp_vs); + if (status) + return status; + + if ((temp_vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { + pr_err("the type[%#x] is not a list\n", temp_vs.type); + return -EFAULT; + } + + odf_vs_to_list(&temp_vs, list); + + return 0; +} + diff --git a/drivers/firmware/uvb/odf/odf_get_fdt.c b/drivers/firmware/uvb/odf/odf_get_fdt.c new file mode 100644 index 0000000000000000000000000000000000000000..b683955e657c510704bcb60f9a748ec8386f6648 --- /dev/null +++ b/drivers/firmware/uvb/odf/odf_get_fdt.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: ODF get fdt info + * Author: mengkanglai + * Create: 2025-04-18 + */ +#include +#include +#include +#include +#include + +int odf_get_fdt_ubiostbl(u64 *phys_addr, char *tbl) +{ + int node, len; + const void *prop; + + node = fdt_path_offset(initial_boot_params, "/chosen"); + if (node < 0) { + pr_err("failed to get device tree chosen node\n"); + return -EINVAL; + } + prop = fdt_getprop(initial_boot_params, node, tbl, &len); + if (!prop) { + pr_err("failed to get property\n"); + return -EINVAL; + } + *phys_addr = (len == 4) ? (u64)be32_to_cpup((const u32 *)prop) : + get_unaligned_be64(prop); + + return 0; +} +EXPORT_SYMBOL(odf_get_fdt_ubiostbl); diff --git a/drivers/firmware/uvb/odf/odf_handle.h b/drivers/firmware/uvb/odf/odf_handle.h new file mode 100644 index 0000000000000000000000000000000000000000..136f4c98364ef778c0bdafbfc2039c49e92d0142 --- /dev/null +++ b/drivers/firmware/uvb/odf/odf_handle.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: libodf handle header + * Author: zhangrui + * Create: 2025-04-18 + */ +#ifndef ODF_HANDLE_H +#define ODF_HANDLE_H +#include + +extern struct ubios_od_root *od_root; + +/** +@brief Search and match one value name, return the pointer of value if matched. +@param[in] start start address of the search. +@param[in] end end address of the search. +@param[in] name value name. +@param[out] vs used to return value structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + start must pointer to the name of value. +*/ +int odf_get_vs_by_name(u8 *start, u8 *end, char *name, struct ubios_od_value_struct *vs); + +/** +@brief Get table information like row, colomn, sub types, .etc. +@param[in] vs value structure +@param[out] table_info used to return table info. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_vs_to_table(struct ubios_od_value_struct *vs, struct ubios_od_table_info *table_info); + +/** +@brief Get a value's offset in row of table, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] name name of data in table wanted to get. +@param[in] type data type. +@param[out] offset used to return offset in the row. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_offset_in_table(const struct ubios_od_table_info *table, + char *name, u8 type, u32 *offset); + +/** +@brief Get a value from table according name and row, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] row the row of table wanted to get. +@param[in] name name of data in table wanted to get. +@param[in] type data type. +@param[out] value data pointer to store returned value. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_data_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 type, void *value); + +/** +@brief Get a value from table according name and row, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] row the row of table wanted to get. +@param[in] name name of data in table wanted to get. +@param[out] value used to return value. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_u8_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 *value); +int odf_get_u32_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u32 *value); +int odf_get_u64_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u64 *value); + +/** +@brief Get a list from od root, will return a list info structure. +@param[in] root root pointer of od +@param[in] path full path to search, if not include index of table. +@param[out] list used to return a list info structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_list(struct ubios_od_root *root, char *path, struct ubios_od_list_info *list); + +int odf_get_struct(struct ubios_od_root *root, char *path, struct ubios_od_value_struct *vs); + +int odf_get_u32_from_list(const struct ubios_od_list_info *list, u16 index, u32 *value); + +/** +@brief Get a value structure from list by index. +@param[in] list list get by function OdfGetList +@param[in] index index in list to get. +@param[out] vs used to return a value structrue +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + Usually function is useful when the data type in list is struct, could get value structure, + then use OdfGetVsByName to search inside. +*/ +int odf_get_data_from_list(const struct ubios_od_list_info *list, + u16 index, struct ubios_od_value_struct *vs); + +/** +@brief Get next structure of a list. +@param[in] list list pointer witch this data belong to. +@param[in/out] vs current structure as input, next structure as output. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + The caller ensure the input structure is a member of list, this function can't check this. +*/ +int odf_next_in_list(const struct ubios_od_list_info *list, struct ubios_od_value_struct *vs); + +/** +@brief Get a value from struct according name, will check type first. +@param[in] vs standard structure of a struct +@param[in] name name of data in table wanted to get. +@param[out] value used to return value. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_u8_from_struct(const struct ubios_od_value_struct *vs, char *name, u8 *value); +int odf_get_u16_from_struct(const struct ubios_od_value_struct *vs, char *name, u16 *value); +int odf_get_u32_from_struct(const struct ubios_od_value_struct *vs, char *name, u32 *value); +int odf_get_bool_from_struct(const struct ubios_od_value_struct *vs, char *name, bool *value); +int odf_get_table_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_table_info *table); +int odf_get_list_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_list_info *list); +int odf_get_list_from_table(u8 *table, char *path, struct ubios_od_list_info *list); +int odf_get_vs_from_table(u8 *table, char *path, struct ubios_od_value_struct *vs); +/** +@brief Check od root's name and checksum, return is it valid. +@param[in] root start of od root +@return +@retval = true, it is valid. +@retval = false, it is invalid. +*/ +bool is_od_root_valid(struct ubios_od_root *root); + +/** +@brief Check od file's checksum, return is it valid. +@param[in] file start of od file +@return +@retval = true, it is valid. +@retval = false, it is invalid. +*/ +bool is_od_file_valid(u8 *file); + +/** +@brief Search all pointer in od root, return the specific od file matched the input name. +@param[in] root start of od root +@param[in] name name of od +@return +@retval = NULL, not found. +@retval != NULL, found. +*/ +u8 *odf_get_od_file(struct ubios_od_root *root, char *name); + +u8 odf_read8(u8 *address); +u16 odf_read16(u8 *address); +u32 odf_read32(u8 *address); +u64 odf_read64(u8 *address); + +u32 odf_checksum(u8 *data, u32 size); +bool odf_is_checksum_ok(struct ubios_od_header *header); +void odf_update_checksum(struct ubios_od_header *header); +int odf_separate_name(char **path, char *name, u64 max_len, u16 *index); +void odf_get_vs_by_pointer(u8 *data, struct ubios_od_value_struct *vs); + +#endif diff --git a/drivers/firmware/uvb/odf/odf_helper.c b/drivers/firmware/uvb/odf/odf_helper.c new file mode 100644 index 0000000000000000000000000000000000000000..c170d80c5018eda3850e746b959c7da1641f1075 --- /dev/null +++ b/drivers/firmware/uvb/odf/odf_helper.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: ODF helper func, including data reading, checksum and path parsing + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include +#include +#include "odf_interface.h" +#include "cis_uvb_interface.h" + +#define UBIOS_OD_INDEX_STRING_MAX 7 +#define DECIMAL 10 + +/* To ensure alignment access, read by one byte */ +static void odf_read(u8 *address, u8 *value, u64 size) +{ + u64 i; + + for (i = 0; i < size; i++) + value[i] = address[i]; +} + +u8 odf_read8(u8 *address) +{ + return *address; +} + +u16 odf_read16(u8 *address) +{ + u16 temp; + + odf_read(address, (u8 *)&temp, sizeof(u16)); + return temp; +} + +u32 odf_read32(u8 *address) +{ + u32 temp; + + odf_read(address, (u8 *)&temp, sizeof(u32)); + return temp; +} + +u64 odf_read64(u8 *address) +{ + u64 temp; + + odf_read(address, (u8 *)&temp, sizeof(u64)); + return temp; +} + +u32 odf_checksum(u8 *data, u32 size) +{ + u64 sum = 0; + u32 temp = size % sizeof(u32); + u64 i; + + for (i = 0; i < size - temp; i += sizeof(u32)) + sum += odf_read32(data + i); + + switch (temp) { + case 1: + sum += odf_read8(data + i); + break; + case 2: + sum += odf_read16(data + i); + break; + case 3: + sum += odf_read32(data + i) & 0x00FFFFFF; + break; + default: + break; + } + + return (~((u32)sum) + 1); +} + +/** +Only calculate the valid data region +*/ +bool odf_is_checksum_ok(struct ubios_od_header *header) +{ + u32 checksum; + + checksum = odf_checksum((u8 *)header, header->total_size); + if (checksum == 0) + return true; + else + return false; +} + +void odf_update_checksum(struct ubios_od_header *header) +{ + header->checksum = 0; + header->checksum = odf_checksum((u8 *)header, header->total_size); +} + +/* +@brief Separate a name from path + change path to the new pointer after this name, if finished, set to NULL + Return a index if it contain [] after name, if input index is NULL, ignore it +@param[in] path a string to be separated +@param[out] name a name separate from path +@param[in] maxLen max length of the name +@param[out] index if do not have index, return 0xFFFF(-1) +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_separate_name(char **path, char *name, u64 max_len, u16 *index) +{ + char *c; + u64 i; + u64 j; + int ret; + char index_string[UBIOS_OD_INDEX_STRING_MAX] = {'\0'}; + bool is_index = false; + + if (!path || !name) + return -EINVAL; + + if (!*path) + return -EOPNOTSUPP; + + c = *path; + pr_debug("odf separate name: path[%s]\n", *path); + + /* if the first character is a separator, skip it */ + if (*c == UBIOS_OD_PATH_SEPARATOR) + c++; + + i = 0; + j = 0; + while ((i < max_len) && (j < UBIOS_OD_INDEX_STRING_MAX)) { + if (*c == UBIOS_OD_PATH_SEPARATOR || *c == '\0') { + name[i++] = '\0'; + if (index) { + ret = kstrtou16(index_string, DECIMAL, index); + if (ret) + *index = UBIOS_OD_INVALID_INDEX; + } + pr_debug("odf separate name: got name[%s]\n", name); + break; + } else if (*c == '[') { + is_index = true; + } else if (*c == ']') { + index_string[j++] = '\0'; + is_index = false; + } else { + if (is_index) + index_string[j++] = *c; + else + name[i++] = *c; + } + c++; + } + + if ((i > max_len) || (j >= UBIOS_OD_INDEX_STRING_MAX)) + return -EOVERFLOW; + + if (*c == '\0') + *path = NULL; + else + *path = c + 1; + + return 0; +} + +/** +@brief Get a name/value structrue by the data pointer +@param[in] data start address of data. +@param[out] vs used to return value structure. +*/ +void odf_get_vs_by_pointer(u8 *data, struct ubios_od_value_struct *vs) +{ + u8 *type_pointer = NULL; + u8 sizeof_length = 0; + + vs->name = (char *)data; + type_pointer = (u8 *)vs->name + strlen(vs->name) + 1; + vs->type = odf_read8(type_pointer); + switch (vs->type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + vs->data_length = sizeof(u8); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + vs->data_length = sizeof(u16); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + vs->data_length = sizeof(u32); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + vs->data_length = sizeof(u64); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_STRING: + vs->data = type_pointer + sizeof(u8); + vs->data_length = (u32)strlen(vs->data) + 1; + break; + default: + sizeof_length = sizeof(u32); + vs->data_length = odf_read32(type_pointer + sizeof(u8)); + vs->data = type_pointer + sizeof(u8) + sizeof_length; + break; + } +} + +bool is_od_root_valid(struct ubios_od_root *root) +{ + if (!root) { + pr_err("odf: root is NULL\n"); + return false; + } + + if (!odf_is_checksum_ok(&(root->header))) { + pr_err("odf: root checksum error.\n"); + return false; + } + + if (strcmp(root->header.name, UBIOS_OD_ROOT_NAME)) { + pr_err("odf: root name[%s] mismatch\n", root->header.name); + return false; + } + + return true; +} + +bool is_od_file_valid(u8 *file) +{ + struct ubios_od_header *header = (struct ubios_od_header *)file; + + if (!header) { + pr_err("odf: file is NULL\n"); + return false; + } + + if (!odf_is_checksum_ok(header)) { + pr_err("odf: file checksum error.\n"); + return false; + } + + return true; +} + +/** +@brief Search all pointer in od root, return the specific od file matched the input name. +@param[in] root start of od root +@param[in] name name of od +@return +@retval = NULL, not found. +@retval != NULL, found. +*/ +u8 *odf_get_od_file(struct ubios_od_root *root, char *name) +{ + u64 i; + + if (!is_od_root_valid(root)) + return NULL; + + if (!name) + return NULL; + + for (i = 0; i < root->count; i++) { + if (!root->odfs[i]) + continue; + + if (strcmp(name, (char *)(u64)root->odfs[i]) == 0) + return (u8 *)(u64)root->odfs[i]; + } + + return NULL; +} diff --git a/drivers/firmware/uvb/odf/odf_interface.h b/drivers/firmware/uvb/odf/odf_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..85d230934cdfc430497b65e9bc580343c435a2c2 --- /dev/null +++ b/drivers/firmware/uvb/odf/odf_interface.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: odf interface header + * Author: zhangrui + * Create: 2025-04-18 + */ +#ifndef ODF_INTERFACE_H +#define ODF_INTERFACE_H +#include + +#define UBIOS_OD_NAME_LEN_MAX 16 +#define UBIOS_OD_VERSION 1 + +#define UBIOS_OD_TYPE_U8 0x1 +#define UBIOS_OD_TYPE_U16 0x2 +#define UBIOS_OD_TYPE_U32 0x3 +#define UBIOS_OD_TYPE_U64 0x4 +#define UBIOS_OD_TYPE_S8 0x5 +#define UBIOS_OD_TYPE_S16 0x6 +#define UBIOS_OD_TYPE_S32 0x7 +#define UBIOS_OD_TYPE_S64 0x8 +#define UBIOS_OD_TYPE_BOOL 0x10 +#define UBIOS_OD_TYPE_CHAR 0x20 +#define UBIOS_OD_TYPE_STRING 0x21 +#define UBIOS_OD_TYPE_STRUCT 0x30 +#define UBIOS_OD_TYPE_TABLE 0x40 +#define UBIOS_OD_TYPE_FILE 0x50 +#define UBIOS_OD_TYPE_LIST 0x80 + +#define UBIOS_OD_ROOT_NAME "root_table" +#define UBIOS_OD_INVALID_INDEX 0xFFFF +#define UBIOS_OD_PATH_SEPARATOR '/' + +#define ODF_FILE_NAME_CALL_ID_SERVICE "call_id_service" +#define ODF_NAME_CIS_GROUP "group" +#define ODF_NAME_CIS_UB "ub" +#define ODF_NAME_CIS_OWNER "owner" +#define ODF_NAME_CIS_CIA "cia" +#define ODF_NAME_CIS_CALL_ID "call_id" +#define ODF_NAME_CIS_USAGE "usage" +#define ODF_NAME_CIS_INDEX "index" +#define ODF_NAME_CIS_FORWARDER_ID "forwarder" + +/* odf processing */ +#define ODF_FILE_NAME_VIRTUAL_BUS "virtual_bus" +#define ODF_NAME_UVB "uvb" +#define ODF_NAME_SECURE "secure" +#define ODF_NAME_DELAY "delay" +#define ODF_NAME_WD "wd" +#define ODF_NAME_OBTAIN "obtain" +#define ODF_NAME_ADDRESS "address" +#define ODF_NAME_BUFFER "buffer" +#define ODF_NAME_SIZE "size" + +/* UBRT table info */ +#define ACPI_SIG_UBRT "UBRT" +#define UBRT_UB_CONTROLLER 0 +#define UBRT_UMMU 1 +#define UBRT_UB_MEMORY 2 +#define UBRT_VIRTUAL_BUS 3 +#define UBRT_CALL_ID_SERVICE 4 + +struct ubios_od_value_struct { + char *name; + u8 type; + u32 data_length; + void *data; +}; + +struct ubios_od_header { + char name[16]; + u32 total_size; + u8 version; + u8 reserved[3]; + u32 remaining_size; + u32 checksum; +}; + +/* +Data structure of UBIOS OD Root Table show below: +|----ubios_od_root----| +| Header | +| count | +| reserved | +| odfs[0] | if not 0 --point to--> a od file +| odfs[...] | if not 0 --point to--> a od file +| odfs[count - 1] | if not 0 --point to--> a od file +*/ +struct ubios_od_root { + struct ubios_od_header header; + u16 count; + u8 reserved[6]; + u64 odfs[]; +}; + +struct ubios_od_table_info { + char *table_name; + u16 row; + u8 col; + char *sub_name_start; + void *value_start; + void *table_end; + u32 length_per_row; +}; + +struct ubios_od_list_info { + char *name; + u8 data_type; /* not include list type */ + u16 count; /* value count in the list */ + void *start; /* pointer to the first value in the list */ + void *end; /* end of list, not include */ +}; + +/** + * struct ubrt_sub_tables - UBRT Sub tables + * @type: type of tables + * @pointer: address to tables + */ +struct ubrt_sub_tables { + u8 type; + u8 reserved[7]; + u64 pointer; +}; + +/** + * struct ubios_ubrt_table - UBRT info + * @count: count of tables + * @sub tables: Sub tables[count] + */ +struct ubios_ubrt_table { + struct acpi_table_header header; + u32 count; + struct ubrt_sub_tables sub_tables[]; +}; + +int odf_get_fdt_ubiostbl(u64 *phys_addr, char *tbl); + +#endif diff --git a/drivers/firmware/uvb/odf/odf_trans.c b/drivers/firmware/uvb/odf/odf_trans.c new file mode 100644 index 0000000000000000000000000000000000000000..ac68c736db2055630e26926d1c1c464a0cd1377e --- /dev/null +++ b/drivers/firmware/uvb/odf/odf_trans.c @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: odf trans file + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include +#include +#include "cis_uvb_interface.h" +#include "odf_interface.h" +#include "odf_handle.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ODF Api"); + +struct cis_info *g_cis_info; +EXPORT_SYMBOL(g_cis_info); + +struct uvb_info *g_uvb_info; +EXPORT_SYMBOL(g_uvb_info); + +struct ubios_od_root *od_root; + +void free_cis_info(void) +{ + u32 i; + + if (!g_cis_info) + return; + + for (i = 0; i < (g_cis_info)->group_count; i++) { + if ((g_cis_info)->groups[i]) { + kfree((g_cis_info)->groups[i]); + (g_cis_info)->groups[i] = NULL; + } + } + kfree(g_cis_info); + g_cis_info = NULL; +} + +static struct cis_group *create_group_from_vs(struct ubios_od_value_struct *vs) +{ + struct ubios_od_list_info list; + struct cis_group *group; + int status; + int i; + + status = odf_get_list_from_struct(vs, ODF_NAME_CIS_CALL_ID, &list); + if (status) { + pr_err("create group: get [call id list] failed, err = %d\n", status); + return NULL; + } + group = kzalloc(sizeof(struct cis_group) + (sizeof(u32) * list.count), GFP_KERNEL); + if (!group) + return NULL; + + status = odf_get_u32_from_struct(vs, ODF_NAME_CIS_OWNER, &(group->owner_user_id)); + if (status) { + pr_err("create group: get [owner id] failed, err = %d\n", status); + goto fail; + } + + status = odf_get_u8_from_struct(vs, ODF_NAME_CIS_USAGE, &(group->usage)); + if (status) { + pr_err("create group: get [usage] failed, err = %d\n", status); + goto fail; + } + + status = odf_get_u8_from_struct(vs, ODF_NAME_CIS_INDEX, &(group->index)); + if (status) + pr_info("cis group not get [index], use default value\n"); + + status = odf_get_u32_from_struct(vs, ODF_NAME_CIS_FORWARDER_ID, &(group->forwarder_id)); + if (status) + pr_info("cis group not get forwarder, use default value\n"); + + group->cis_count = list.count; + for (i = 0; i < list.count; i++) { + status = odf_get_u32_from_list(&list, i, &(group->call_id[i])); + if (status) { + pr_err("create group: get each call id failed, err = %d\n", status); + goto fail; + } + } + + return group; + +fail: + kfree(group); + + return NULL; +} + +static int create_cis_info_from_odf(void) +{ + struct ubios_od_list_info list; + struct ubios_od_value_struct vs; + struct ubios_od_value_struct ub_vs; + struct ubios_ubrt_table *ubrt_table = NULL; + struct acpi_table_header *table = NULL; + u8 *sub_table = NULL; + struct ubios_od_header *header = NULL; + acpi_status status; + int i = 0; + int err = 0; + u32 sub_table_size = 0; + int ub_vs_err = 0; + + status = acpi_get_table(ACPI_SIG_UBRT, 0, &table); + if (ACPI_SUCCESS(status)) { + ubrt_table = (struct ubios_ubrt_table *)table; + + for (i = 0; i < ubrt_table->count; i++) { + if (ubrt_table->sub_tables[i].type == UBRT_CALL_ID_SERVICE) { + header = memremap(ubrt_table->sub_tables[i].pointer, + sizeof(struct ubios_od_header), MEMREMAP_WB); + if (!header) { + pr_err("failed to map cis table to od header in ACPI\n"); + return -ENOMEM; + } + sub_table_size = header->total_size; + memunmap(header); + sub_table = (u8 *)memremap(ubrt_table->sub_tables[i].pointer, + sub_table_size, MEMREMAP_WB); + break; + } + } + + if (!sub_table) { + pr_err("failed to get cis table address in ACPI\n"); + return -ENOMEM; + } + pr_info("get cis sub table success\n"); + + err = odf_get_list_from_table(sub_table, ODF_NAME_CIS_GROUP, &list); + if (err) { + pr_err("create cis info from odf failed, group not found, err = %d\n", + err); + goto free_sub_table; + } + + ub_vs_err = odf_get_vs_from_table(sub_table, ODF_NAME_CIS_UB, &ub_vs); + } else { + err = odf_get_list(od_root, + ODF_FILE_NAME_CALL_ID_SERVICE "/" ODF_NAME_CIS_GROUP, &list); + if (err) { + pr_err("create cis info from odf failed, group not found, err = %d\n", + err); + return err; + } + + ub_vs_err = odf_get_struct(od_root, + ODF_FILE_NAME_CALL_ID_SERVICE "/" ODF_NAME_CIS_UB, &ub_vs); + } + + g_cis_info = kzalloc(sizeof(struct cis_info) + (sizeof(void *) * list.count), GFP_KERNEL); + if (!g_cis_info) { + err = -ENOMEM; + goto free_sub_table; + } + g_cis_info->group_count = list.count; + + err = odf_get_data_from_list(&list, 0, &vs); + if (err) { + pr_err("create cis info from odf failed: get data from CIS group failed, err = %d\n", + err); + goto fail; + } + for (i = 0; i < list.count; i++) { + g_cis_info->groups[i] = create_group_from_vs(&vs); + if (!g_cis_info->groups[i]) { + pr_err("create cis group from odf failed\n"); + err = -ENODATA; + goto fail; + } + (void)odf_next_in_list(&list, &vs); + } + + if (!ub_vs_err) { + pr_info("found ub struct in cis info\n"); + err = odf_get_u8_from_struct(&ub_vs, ODF_NAME_CIS_USAGE, &(g_cis_info->ub.usage)); + if (err) { + pr_err("create group: get [usage] failed, err = %d\n", status); + goto fail; + } + + err = odf_get_u8_from_struct(&ub_vs, ODF_NAME_CIS_INDEX, &(g_cis_info->ub.index)); + if (err) + pr_warn("ub struct not get [index], use default value\n"); + + err = odf_get_u32_from_struct(&ub_vs, ODF_NAME_CIS_FORWARDER_ID, + &(g_cis_info->ub.forwarder_id)); + if (err) + pr_warn("ub struct not get forwarder, use default value\n"); + } else + pr_warn("not found ub struct in cis info\n"); + + if (sub_table) + memunmap(sub_table); + + pr_info("get cis table from odf success\n"); + + return 0; + +fail: + free_cis_info(); +free_sub_table: + if (sub_table) + memunmap(sub_table); + + return err; +} + +static void free_uvb_info(void) +{ + u16 i; + + if (!g_uvb_info) + return; + + for (i = 0; i < (g_uvb_info)->uvb_count; i++) { + if ((g_uvb_info)->uvbs[i]) { + kfree((g_uvb_info)->uvbs[i]); + (g_uvb_info)->uvbs[i] = NULL; + } + } + + kfree(g_uvb_info); + g_uvb_info = NULL; +} + +static struct uvb *create_uvb_from_vs(const struct ubios_od_value_struct *vs) +{ + struct uvb *temp_uvb; + struct ubios_od_table_info wd; + int status; + u16 row; + + status = odf_get_table_from_struct(vs, ODF_NAME_WD, &wd); + if (status) { + pr_err("create uvb info: get [wd] failed, [%d]\n", status); + return NULL; + } + temp_uvb = kzalloc(sizeof(struct uvb) + + sizeof(struct uvb_window_description) * wd.row, GFP_KERNEL); + if (!temp_uvb) + return NULL; + + if (wd.row > UVB_WINDOW_COUNT_MAX) { + pr_err("create uvb info: uvb window count[%d] error.\n", wd.row); + goto fail; + } + temp_uvb->window_count = (u8)wd.row; + (void)odf_get_bool_from_struct(vs, ODF_NAME_SECURE, &temp_uvb->secure); + (void)odf_get_u16_from_struct(vs, ODF_NAME_DELAY, &temp_uvb->delay); + for (row = 0; row < wd.row; row++) { + status = odf_get_u64_from_table(&wd, + row, ODF_NAME_OBTAIN, &(temp_uvb->wd[row].obtain)); + if (status) { + pr_err("create uvb info: get [obtain] failed, %d.\n", status); + goto fail; + } + status = odf_get_u64_from_table(&wd, + row, ODF_NAME_ADDRESS, &(temp_uvb->wd[row].address)); + if (status) { + pr_err("create uvb info: get [address] failed, %d.\n", status); + goto fail; + } + (void)odf_get_u64_from_table(&wd, + row, ODF_NAME_BUFFER, &(temp_uvb->wd[row].buffer)); + (void)odf_get_u32_from_table(&wd, row, ODF_NAME_SIZE, &(temp_uvb->wd[row].size)); + } + + return temp_uvb; +fail: + kfree(temp_uvb); + + return NULL; +} + +static int create_uvb_info_from_odf(void) +{ + struct ubios_od_list_info uvb_list; + struct ubios_od_value_struct vs; + struct ubios_ubrt_table *ubrt_table = NULL; + struct acpi_table_header *table = NULL; + u8 *sub_table = NULL; + struct ubios_od_header *header = NULL; + acpi_status status; + int i = 0; + int err = 0; + u32 sub_table_size = 0; + + status = acpi_get_table(ACPI_SIG_UBRT, 0, &table); + if (ACPI_SUCCESS(status)) { + ubrt_table = (struct ubios_ubrt_table *)table; + for (i = 0; i < ubrt_table->count; i++) { + if (ubrt_table->sub_tables[i].type == UBRT_VIRTUAL_BUS) { + header = memremap(ubrt_table->sub_tables[i].pointer, + sizeof(struct ubios_od_header), MEMREMAP_WB); + if (!header) { + pr_err("failed to map uvb table to od header in ACPI\n"); + return -ENOMEM; + } + sub_table_size = header->total_size; + memunmap(header); + sub_table = (u8 *)memremap(ubrt_table->sub_tables[i].pointer, + sub_table_size, MEMREMAP_WB); + break; + } + } + + if (!sub_table) { + pr_err("failed to get uvb table address in ACPI\n"); + return -ENOMEM; + } + pr_info("get uvb sub table suceess\n"); + + err = odf_get_list_from_table(sub_table, ODF_NAME_UVB, &uvb_list); + if (err) { + pr_err("create uvb info: find uvb from od failed, err = %d\n", err); + goto free_sub_table; + } + } else { + err = odf_get_list(od_root, ODF_FILE_NAME_VIRTUAL_BUS "/" ODF_NAME_UVB, &uvb_list); + if (err) { + pr_err("create uvb info: find uvb from od failed, err = %d\n", err); + return err; + } + } + + g_uvb_info = kzalloc(sizeof(struct uvb_info) + sizeof(void *) * uvb_list.count, GFP_KERNEL); + if (!g_uvb_info) { + err = -ENOMEM; + goto free_sub_table; + } + if (uvb_list.count > UVB_WINDOW_COUNT_MAX) { + pr_err("create uvb info: uvb count[%d] error.\n", uvb_list.count); + err = -EOVERFLOW; + goto fail; + } + g_uvb_info->uvb_count = (u8)uvb_list.count; + err = odf_get_data_from_list(&uvb_list, 0, &vs); + if (err) { + pr_err("create uvb info: get uvb failed [%d]\n", err); + goto fail; + } + for (i = 0; i < uvb_list.count; i++) { + g_uvb_info->uvbs[i] = create_uvb_from_vs(&vs); + if (!g_uvb_info->uvbs[i]) { + pr_err("create uvb from odf failed\n"); + err = -EINVAL; + goto fail; + } + (void)odf_next_in_list(&uvb_list, &vs); + } + if (sub_table) + memunmap(sub_table); + + pr_info("get uvb table from odf success\n"); + + return 0; + +fail: + free_uvb_info(); +free_sub_table: + if (sub_table) + memunmap(sub_table); + + return err; +} + +static void free_odf_info(void) +{ + kfree(od_root); + od_root = NULL; +} + +static int create_odf_info(void) +{ + u64 od_root_phys = 0; /* physical address */ + struct ubios_od_root *od_root_origin = NULL; /* virtual address */ + struct acpi_table_header *ubrt_header = NULL; + u32 od_root_size = 0; + int i = 0; + acpi_status status; + int ret = 0; + u16 count = 0; + + status = acpi_get_table(ACPI_SIG_UBRT, 0, &ubrt_header); + if (ACPI_SUCCESS(status)) { + pr_info("Success fully get UBRT table\n"); + return 0; + } + ret = odf_get_fdt_ubiostbl(&od_root_phys, "linux,ubiostbl"); + if (ret) { + pr_err("from fdt get ubiostbl failed\n"); + return -1; + } + + od_root_origin = (struct ubios_od_root *) + memremap(od_root_phys, sizeof(struct ubios_od_header), MEMREMAP_WB); + if (!od_root_origin) { + pr_err("od_root header memremap failed, od_root addr=%016llx\n", od_root_phys); + return -1; + } + od_root_size = od_root_origin->header.total_size; + memunmap((void *)od_root_origin); + + od_root_origin = (struct ubios_od_root *)memremap(od_root_phys, od_root_size, MEMREMAP_WB); + if (!od_root_origin) { + pr_err("od_root memremap failed, od_root addr=%016llx\n", od_root_phys); + return -1; + } + + count = od_root_origin->count; + od_root = kzalloc(sizeof(struct ubios_od_root) + count * sizeof(u64), GFP_KERNEL); + if (!od_root) { + pr_err("kmalloc od_root failed\n"); + goto free_od_root; + } + memcpy(&od_root->header, &od_root_origin->header, sizeof(struct ubios_od_header)); + od_root->count = od_root_origin->count; + + for (i = 0; i < od_root->count; i++) { + if (!od_root_origin->odfs[i]) + continue; + + od_root->odfs[i] = od_root_origin->odfs[i]; + } + if (od_root_origin) + memunmap(od_root_origin); + + odf_update_checksum(&od_root->header); + pr_info("get ubios table success\n"); + + return 0; + +free_od_root: + if (od_root_origin) + memunmap(od_root_origin); + + return -1; +} + +static int __init odf_init(void) +{ + int status; + + status = create_odf_info(); + if (status) { + pr_err("odf table init failed\n"); + return -1; + } + + status = create_cis_info_from_odf(); + if (status) { + pr_err("create cis info failed, cis is invalid\n"); + goto free_odf; + } + + status = create_uvb_info_from_odf(); + if (status) { + pr_err("create uvb info failed, uvb is invalid\n"); + goto free_cis; + } + + pr_info("odf init success\n"); + + return 0; + +free_cis: + free_cis_info(); +free_odf: + free_odf_info(); + + return -1; +} + +static void __exit odf_exit(void) +{ + free_uvb_info(); + free_cis_info(); + free_odf_info(); + + pr_info("odf exit success\n"); +} + +module_init(odf_init); +module_exit(odf_exit); diff --git a/drivers/ub/Kconfig b/drivers/ub/Kconfig index 946067487c42b18de40b0edc40e65b646b36de40..0b7cb0ef16cf335b25930602f3425c0cb428e8cd 100644 --- a/drivers/ub/Kconfig +++ b/drivers/ub/Kconfig @@ -16,7 +16,8 @@ if UB source "drivers/ub/ubus/Kconfig" source "drivers/ub/ubfi/Kconfig" source "drivers/ub/ubase/Kconfig" - +source "drivers/ub/obmm/Kconfig" +source "drivers/ub/sentry/Kconfig" config UB_URMA tristate "Unified Bus (UB) urma support" default m diff --git a/drivers/ub/Makefile b/drivers/ub/Makefile index c1fd5627ca9b693952f0f364fb7149d6f92d5e32..d1dd2267abe0f2eaf2f9d4952ddd4a5876cb49f3 100644 --- a/drivers/ub/Makefile +++ b/drivers/ub/Makefile @@ -4,3 +4,5 @@ obj-y += ubus/ obj-y += ubfi/ obj-$(CONFIG_UB_URMA) += urma/ obj-$(CONFIG_UB_UBASE) += ubase/ +obj-y += obmm/ +obj-$(CONFIG_UB_SENTRY) += sentry/ diff --git a/drivers/ub/obmm/Kconfig b/drivers/ub/obmm/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..85d2eff555e16a3c4c4b616dcfb3185d73398fe7 --- /dev/null +++ b/drivers/ub/obmm/Kconfig @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0-only +config OBMM + default n + tristate "OBMM(ownership based memory management) Support" + depends on UB_UMMU_CORE && UB_UBUS && HISI_SOC_CACHE + select NUMA_REMOTE + select PFN_RANGE_ALLOC + select RECLAIM_NOTIFY + help + OBMM (Ownership Based Memory Management) provides a framework for + managing shared memory regions across multiple systems. + It supports both memory import (accessing remote memory) and export + (making local memory visible across systems) operations with proper + NUMA integration and provides capability of cross-supernode cache + consistency maintenance. + If unsure, say N. \ No newline at end of file diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e5f25776ce871d672498d11f7a4600919bfdaf9e --- /dev/null +++ b/drivers/ub/obmm/Makefile @@ -0,0 +1,11 @@ + +# SPDX-License-Identifier: GPL-2.0+ + +obmm-y := obmm_core.o \ + obmm_shm_dev.o obmm_sysfs_ub.o obmm_cache.o obmm_import.o \ + obmm_export_from_user.o obmm_ownership.o conti_mem_allocator.o \ + obmm_export.o obmm_export_from_pool.o obmm_preimport.o \ + ubmempool_allocator.o obmm_lowmem.o obmm_export_region_ops.o \ + obmm_addr_check.o obmm_preimport_prefilled.o obmm_resource.o + +obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/conti_mem_allocator.c b/drivers/ub/obmm/conti_mem_allocator.c new file mode 100644 index 0000000000000000000000000000000000000000..8ca482f5dfa5fbc8939c8c942d690028d0bde979 --- /dev/null +++ b/drivers/ub/obmm/conti_mem_allocator.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#define pr_fmt(fmt) "OBMM: conti_mem:" fmt +#include +#include +#include + +#include "conti_mem_allocator.h" + +static atomic_t pool_thread_should_pause = ATOMIC_INIT(0); + +static int conti_clear_memseg(struct conti_mem_allocator *a, struct memseg_node *node) +{ + if (a->ops->clear_memseg) + return a->ops->clear_memseg(a, node); + return -EOPNOTSUPP; +} + +static void conti_pool_free_memseg(struct conti_mem_allocator *a, struct memseg_node *node) +{ + if (a->ops->pool_free_memseg) { + pr_debug("free memseg: nid %d\n", a->nid); + a->ops->pool_free_memseg(a, node); + } +} + +static struct memseg_node *conti_pool_alloc_memseg(struct conti_mem_allocator *a) +{ + if (a->ops->pool_alloc_memseg) { + pr_debug("alloc memseg: nid %d\n", a->nid); + return a->ops->pool_alloc_memseg(a); + } else { + return NULL; + } +} + +static bool conti_has_poisoned_memseg(struct conti_mem_allocator *a) +{ + /* this lockless read is safe and is intended */ + return !list_empty(&a->memseg_poisoned); +} + +static bool conti_need_contract(struct conti_mem_allocator *a) +{ + if (a->ops->need_contract) + return a->ops->need_contract(a); + else + return false; +} + +static size_t conti_contract_size(struct conti_mem_allocator *a) +{ + if (a->ops->contract_size) + return a->ops->contract_size(a); + else + return 0; +} + +static bool conti_need_expand(struct conti_mem_allocator *a) +{ + if (a->ops->need_expand) + return a->ops->need_expand(a); + else + return false; +} + +static size_t conti_expand_size(struct conti_mem_allocator *a) +{ + if (a->ops->expand_size) + return a->ops->expand_size(a); + else + return 0; +} + +size_t conti_mem_allocator_expand(struct conti_mem_allocator *allocator, size_t size) +{ + unsigned long count, flags; + struct memseg_node *node; + size_t expand_size; + + if (size == 0 || size % allocator->granu) { + pr_err("size %#zx is zero or not aligned with allocator->granu.\n", size); + return 0; + } + + count = size / allocator->granu; + while (count > 0 && atomic_read(&pool_thread_should_pause) == 0) { + node = conti_pool_alloc_memseg(allocator); + if (!node) + break; + + spin_lock_irqsave(&allocator->lock, flags); + list_add_tail(&node->list, &allocator->memseg_uncleared); + spin_unlock_irqrestore(&allocator->lock, flags); + count--; + } + + if (allocator->clear_work) + wake_up_interruptible(&allocator->clear_wq); + + expand_size = size - count * allocator->granu; + atomic64_add(expand_size, &allocator->pooled_mem_size); + if (expand_size > 0) + pr_debug("%s: expand expect size %#zx, actual size %#zx\n", current->comm, size, + expand_size); + + return expand_size; +} + +size_t conti_mem_allocator_contract(struct conti_mem_allocator *allocator, size_t size) +{ + struct list_head contract_list; + struct memseg_node *node, *tmp; + unsigned long count, flags; + size_t contract_size; + + if (size == 0 || size % allocator->granu) { + pr_err_ratelimited("size %#zx is zero or not aligned with allocator->granu.\n", + size); + return 0; + } + + count = size / allocator->granu; + if (allocator->ops->pool_free_memseg == NULL) + return 0; + + INIT_LIST_HEAD(&contract_list); + spin_lock_irqsave(&allocator->lock, flags); + list_for_each_entry_safe(node, tmp, &allocator->memseg_uncleared, list) { + list_move_tail(&node->list, &contract_list); + count--; + if (count == 0) + goto done; + } + + list_for_each_entry_safe(node, tmp, &allocator->memseg_ready, list) { + list_move_tail(&node->list, &contract_list); + count--; + if (count == 0) + goto done; + } + +done: + spin_unlock_irqrestore(&allocator->lock, flags); + list_for_each_entry_safe(node, tmp, &contract_list, list) { + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + } + contract_size = size - count * allocator->granu; + atomic64_sub(contract_size, &allocator->pooled_mem_size); + if (contract_size > 0) + pr_debug("%s: nid: %d, contract expect size %#zx, actual size %#zx\n", + current->comm, allocator->nid, size, contract_size); + + return contract_size; +} + +static size_t conti_mem_allocator_free_poisoned(struct conti_mem_allocator *allocator) +{ + LIST_HEAD(free_list); + struct memseg_node *node, *tmp; + size_t free_size = 0; + unsigned long flags; + + if (allocator->ops->pool_free_memseg == NULL) { + pr_debug("%s: no means to free poisoned memseg.\n", __func__); + return 0; + } + + spin_lock_irqsave(&allocator->lock, flags); + list_splice_init(&allocator->memseg_poisoned, &free_list); + spin_unlock_irqrestore(&allocator->lock, flags); + + list_for_each_entry_safe(node, tmp, &free_list, list) { + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + free_size += allocator->granu; + } + /* The memory freed by this function has already been subtracted from pooled memory size + * when isolated. + */ + if (free_size > 0) + pr_debug("%s: nid: %d, %#zx poisoned memory freed\n", current->comm, allocator->nid, + free_size); + + return free_size; +} + +void conti_free_memory(struct conti_mem_allocator *allocator, struct list_head *head) +{ + size_t freed_size = 0; + struct memseg_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, head, list) { + freed_size += allocator->granu; + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + pr_debug("allocator: freed: %d: 0x%llx + 0x%lx\n", allocator->nid, node->addr, + node->size); + } + + atomic64_sub(freed_size, &allocator->pooled_mem_size); + atomic64_sub(freed_size, &allocator->used_mem_size); + pr_debug("%s: freed_size %#zx on node %d.\n", current->comm, freed_size, allocator->nid); +} + +static size_t conti_alloc_memory_slow(struct conti_mem_allocator *allocator, size_t size, + struct list_head *head, bool clear) +{ + struct memseg_node *node; + size_t allocated = 0; + int ret; + + while (size) { + node = conti_pool_alloc_memseg(allocator); + if (!node) + break; + + if (clear) { + ret = conti_clear_memseg(allocator, node); + if (ret < 0) { + conti_pool_free_memseg(allocator, node); + break; + } + } + allocated += allocator->granu; + list_add_tail(&node->list, head); + size -= allocator->granu; + } + + atomic64_add(allocated, &allocator->pooled_mem_size); + atomic64_add(allocated, &allocator->used_mem_size); + pr_info("%s: slow allocated %#zx from node %d\n", current->comm, allocated, allocator->nid); + return allocated; +} + +size_t conti_alloc_memory(struct conti_mem_allocator *allocator, size_t size, + struct list_head *head, bool clear, bool allow_slow) +{ + struct list_head *first, *second, *entry, temp_list; + struct memseg_node *node; + size_t allocated = 0, available; + unsigned long flags; + + atomic_inc(&pool_thread_should_pause); + INIT_LIST_HEAD(&temp_list); + if (clear) { + first = &allocator->memseg_ready; + second = &allocator->memseg_uncleared; + } else { + second = &allocator->memseg_ready; + first = &allocator->memseg_uncleared; + } + + spin_lock_irqsave(&allocator->lock, flags); + available = conti_get_avail(allocator); + if (!allow_slow && available < size) { + pr_err("%s:fast alloc failed. nid: %d, request: 0x%lx, available: 0x%lx\n", + __func__, allocator->nid, size, available); + spin_unlock_irqrestore(&allocator->lock, flags); + goto out_continue_pool; + } + list_for_each(entry, first) { + if (allocated >= size) + break; + allocated += allocator->granu; + pr_debug("alloc 1 node from %s list.\n", clear ? "cleared" : "uncleared"); + } + list_cut_before(head, first, entry); + + list_for_each(entry, second) { + if (allocated >= size) + break; + allocated += allocator->granu; + pr_debug("alloc 1 node from %s list.\n", !clear ? "cleared" : "uncleared"); + } + list_cut_before(&temp_list, second, entry); + spin_unlock_irqrestore(&allocator->lock, flags); + + atomic64_add(allocated, &allocator->used_mem_size); + + /* now: head collects elements from the first list, temp_list holds elements form the + * second list and clearing node. When the caller requests for cleared data, all nodes in + * temp_list should be cleared synchronously. + */ + if (clear) + list_for_each_entry(node, &temp_list, list) + conti_clear_memseg(allocator, node); + list_splice(&temp_list, head); + + if (allocated < size) + allocated += conti_alloc_memory_slow(allocator, size - allocated, head, clear); + + list_for_each_entry(node, head, list) { + pr_debug("allocator: allocated: %d: 0x%llx + 0x%lx\n", allocator->nid, node->addr, + node->size); + } + pr_info("%s: allocated %#zx from node %d\n", current->comm, allocated, allocator->nid); + +out_continue_pool: + atomic_dec(&pool_thread_should_pause); + + /* not aligned */ + WARN_ON(allocated > size); + + return allocated; +} + +bool conti_mem_allocator_isolate_memseg(struct conti_mem_allocator *a, unsigned long addr) +{ + struct memseg_node *node; + bool found = false; + unsigned long flags; + + if (!a->initialized) + return false; + addr = ALIGN_DOWN(addr, a->granu); + spin_lock_irqsave(&a->lock, flags); + list_for_each_entry(node, &a->memseg_ready, list) { + if (node->addr == addr) { + pr_debug("isolate memseg from cleared pool.\n"); + list_move(&node->list, &a->memseg_poisoned); + found = true; + goto out; + } + } + list_for_each_entry(node, &a->memseg_uncleared, list) { + if (node->addr == addr) { + pr_debug("isolate memseg from uncleared pool.\n"); + list_move(&node->list, &a->memseg_poisoned); + found = true; + goto out; + } + } + if (a->memseg_clearing && a->memseg_clearing->addr == addr) + pr_warn("memseg to isolate is being cleared; isolation failed.\n"); + else + pr_debug("memseg to isolate not found in pooled allocator of nid=%d.\n", a->nid); + +out: + spin_unlock_irqrestore(&a->lock, flags); + if (found) + atomic64_sub(a->granu, &a->pooled_mem_size); + return found; +} + +static int conti_clear_thread(void *p) +{ + struct conti_mem_allocator *allocator = p; + struct memseg_node *node; + int ret; + unsigned long flags; + + pr_debug("%s: nid=%d, start\n", __func__, allocator->nid); + allocator->memseg_clearing = NULL; + while (!kthread_should_stop()) { + wait_event_interruptible(allocator->clear_wq, + !list_empty(&allocator->memseg_uncleared) || + kthread_should_stop()); + + if (kthread_should_stop()) + break; + spin_lock_irqsave(&allocator->lock, flags); + if (list_empty(&allocator->memseg_uncleared)) { + spin_unlock_irqrestore(&allocator->lock, flags); + continue; + } + + node = list_first_entry(&allocator->memseg_uncleared, struct memseg_node, list); + list_del(&node->list); + allocator->memseg_clearing = node; + + spin_unlock_irqrestore(&allocator->lock, flags); + ret = conti_clear_memseg(allocator, node); + + spin_lock_irqsave(&allocator->lock, flags); + allocator->memseg_clearing = NULL; + if (ret) + list_add(&node->list, &allocator->memseg_uncleared); + else + list_add(&node->list, &allocator->memseg_ready); + spin_unlock_irqrestore(&allocator->lock, flags); + } + pr_debug("%s: nid=%d, exit\n", __func__, allocator->nid); + + return 0; +} + +static int clear_thread_init(struct conti_mem_allocator *allocator) +{ + struct task_struct *work; + + work = kthread_create_on_node(conti_clear_thread, allocator, allocator->nid, + "conti_clear_%s", allocator->name); + if (IS_ERR(work)) { + pr_err("failed to init conti_clear task\n"); + return -ENODEV; + } + (void)wake_up_process(work); + + allocator->clear_work = work; + + return 0; +} + +#define POOL_THREAD_SLEEP_JIFFIES msecs_to_jiffies(5000) +static int conti_pool_thread(void *p) +{ + struct conti_mem_allocator *allocator = p; + size_t size, ret_size; + + pr_debug("%s: nid=%d, start\n", __func__, allocator->nid); + while (!kthread_should_stop()) { + wait_event_interruptible_timeout(allocator->pool_wq, + atomic_read(&pool_thread_should_pause) == 0 && + (conti_has_poisoned_memseg(allocator) || + conti_need_contract(allocator) || + conti_need_expand(allocator) || + kthread_should_stop()), + POOL_THREAD_SLEEP_JIFFIES); + + if (kthread_should_stop()) + break; + + if (conti_has_poisoned_memseg(allocator)) { + ret_size = conti_mem_allocator_free_poisoned(allocator); + pr_debug("%s: nid=%d, free poisoned done, ret=%#zx\n", __func__, + allocator->nid, ret_size); + } + + if (conti_need_contract(allocator)) { + size = conti_contract_size(allocator); + if (size > 0) { + pr_debug("%s: nid=%d, size=%#lx start contract\n", __func__, + allocator->nid, size); + ret_size = conti_mem_allocator_contract(allocator, size); + if (ret_size) + pr_debug("%s: nid=%d, contract done, ret=%#zx\n", __func__, + allocator->nid, ret_size); + } + } + + if (conti_need_expand(allocator)) { + size = conti_expand_size(allocator); + if (size > 0) { + pr_debug("%s: nid=%d, start expand\n", __func__, allocator->nid); + ret_size = conti_mem_allocator_expand(allocator, size); + if (ret_size) + pr_debug("%s: nid=%d, expand done, ret=%#zx\n", __func__, + allocator->nid, ret_size); + } + } + } + pr_debug("%s: nid=%d, exit\n", __func__, allocator->nid); + + return 0; +} + +static int pool_thread_init(struct conti_mem_allocator *allocator) +{ + struct task_struct *work; + + init_waitqueue_head(&allocator->pool_wq); + work = kthread_create_on_node(conti_pool_thread, allocator, allocator->nid, "conti_pool_%s", + allocator->name); + if (IS_ERR(work)) { + pr_err("failed to init conti_pool task\n"); + return -ENODEV; + } + (void)wake_up_process(work); + + allocator->pool_work = work; + + return 0; +} + +int conti_mem_allocator_init(struct conti_mem_allocator *allocator, int nid, size_t granu, + const struct conti_mempool_ops *ops, const char *fmt, ...) +{ + va_list ap; + int ret; + + if (!allocator || !ops) { + pr_err("%s: null pointer.\n", __func__); + return -EINVAL; + } + if (!ops->need_expand || !ops->expand_size) { + pr_err("expand ops is required.\n"); + return -EINVAL; + } + if (!IS_ALIGNED(granu, PAGE_SIZE) || granu == 0) { + pr_err("invalid granu size %#lx.\n", granu); + return -EINVAL; + } + + va_start(ap, fmt); + allocator->name = kvasprintf(GFP_KERNEL, fmt, ap); + va_end(ap); + if (!allocator->name) + return -ENOMEM; + + allocator->nid = nid; + allocator->granu = granu; + atomic64_set(&allocator->pooled_mem_size, 0); + atomic64_set(&allocator->used_mem_size, 0); + spin_lock_init(&allocator->lock); + INIT_LIST_HEAD(&allocator->memseg_ready); + init_waitqueue_head(&allocator->clear_wq); + INIT_LIST_HEAD(&allocator->memseg_uncleared); + allocator->memseg_clearing = NULL; + INIT_LIST_HEAD(&allocator->memseg_poisoned); + + allocator->ops = ops; + + if (ops->clear_memseg) { + ret = clear_thread_init(allocator); + if (ret) { + kfree(allocator->name); + allocator->name = NULL; + return ret; + } + } + + ret = pool_thread_init(allocator); + if (ret) { + if (allocator->clear_work) + kthread_stop(allocator->clear_work); + kfree(allocator->name); + allocator->name = NULL; + return ret; + } + + allocator->initialized = true; + + return 0; +} + +void conti_mem_allocator_deinit(struct conti_mem_allocator *allocator) +{ + struct memseg_node *node, *tmp; + struct list_head free_list; + unsigned long flags; + + INIT_LIST_HEAD(&free_list); + if (allocator->pool_work) + kthread_stop(allocator->pool_work); + + if (allocator->clear_work) + kthread_stop(allocator->clear_work); + + kfree(allocator->name); + if (!allocator->ops->pool_free_memseg) { + pr_err("pool_free_memseg is not defined.\n"); + return; + } + + /* Release all memory nodes chained in memseg_uncleared, memseg_ready + * and memseg_poisoned. + * NOTE: No memory node will be held in allocator->memseg_clearing after + * the clear worker stops working. + */ + spin_lock_irqsave(&allocator->lock, flags); + list_splice(&allocator->memseg_uncleared, &free_list); + list_splice(&allocator->memseg_ready, &free_list); + list_splice(&allocator->memseg_poisoned, &free_list); + spin_unlock_irqrestore(&allocator->lock, flags); + + list_for_each_entry_safe(node, tmp, &free_list, list) { + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + } + memset(allocator, 0, sizeof(*allocator)); +} diff --git a/drivers/ub/obmm/conti_mem_allocator.h b/drivers/ub/obmm/conti_mem_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..84cf24b64b4b62b3585f964ef39e881e018b7974 --- /dev/null +++ b/drivers/ub/obmm/conti_mem_allocator.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#ifndef CONTI_MEM_ALLOC +#define CONTI_MEM_ALLOC + +#include +#include +#include +#include + +struct memseg_node { + phys_addr_t addr; + size_t size; + struct list_head list; +}; + +struct conti_mem_allocator; + +/** + * struct conti_mempool_ops - Memory pool operation callbacks for the allocator + * + * This structure defines a set of callback functions that customize the + * behavior of the memory allocator for different memory management strategies. + * Each function pointer implements specific operations required for memory + * allocation, deallocation, and pool management. + * + * @clear_memseg: Clear the memory segment's data (e.g., zeroing or secure erase) + * @allocator: Pointer to the memory allocator instance + * @memseg: Memory segment to be cleared + * Return: 0 for success, or an error on failure + * + * @pool_free_memseg: Return a memory segment to the pool for reuse + * @allocator: Pointer to the memory allocator instance + * @memseg: Memory segment to be freed back to the pool + * + * @pool_alloc_memseg: Allocate a new memory segment from the underlying memory source + * @allocator: Pointer to the memory allocator instance + * Return: A newly allocated memory segment, or NULL on failure + * + * @need_contract: Check if the memory pool should be shrunk + * @allocator: Pointer to the memory allocator instance + * Return: true if contraction is needed, false otherwise + * + * @contract_size: Calculate the size to contract the memory pool + * @allocator: Pointer to the memory allocator instance + * Return: The size (in bytes) to reduce the pool, or 0 if no contraction + * + * @need_expand: Check if the memory pool should be expanded + * @allocator: Pointer to the memory allocator instance + * Return: true if expansion is needed, false otherwise + * + * @expand_size: Calculate the size to expand the memory pool + * @allocator: Pointer to the memory allocator instance + * Return: The size (in bytes) to increase the pool, or 0 if no expansion + */ +struct conti_mempool_ops { + int (*clear_memseg)(struct conti_mem_allocator *allocator, struct memseg_node *node); + void (*pool_free_memseg)(struct conti_mem_allocator *allocator, struct memseg_node *node); + struct memseg_node *(*pool_alloc_memseg)(struct conti_mem_allocator *allocator); + bool (*need_contract)(struct conti_mem_allocator *allocator); + size_t (*contract_size)(struct conti_mem_allocator *allocator); + bool (*need_expand)(struct conti_mem_allocator *allocator); + size_t (*expand_size)(struct conti_mem_allocator *allocator); +}; + +struct conti_mem_allocator { + bool initialized; + + int nid; + size_t granu; + + atomic64_t pooled_mem_size; + atomic64_t used_mem_size; + + spinlock_t lock; + struct list_head memseg_ready; + struct list_head memseg_uncleared; + struct memseg_node *memseg_clearing; + struct list_head memseg_poisoned; + + struct task_struct *clear_work; + struct wait_queue_head clear_wq; + + struct task_struct *pool_work; + struct wait_queue_head pool_wq; + + const struct conti_mempool_ops *ops; + const char *name; +}; + +static inline size_t conti_get_total(struct conti_mem_allocator *a) +{ + return atomic64_read(&a->pooled_mem_size); +} + +static inline size_t conti_get_avail(struct conti_mem_allocator *a) +{ + return atomic64_read(&a->pooled_mem_size) - atomic64_read(&a->used_mem_size); +} + +int conti_mem_allocator_init(struct conti_mem_allocator *allocator, int nid, size_t granu, + const struct conti_mempool_ops *ops, const char *fmt, ...); +void conti_mem_allocator_deinit(struct conti_mem_allocator *allocator); + +void conti_free_memory(struct conti_mem_allocator *allocator, struct list_head *head); + +size_t conti_alloc_memory(struct conti_mem_allocator *allocator, size_t size, + struct list_head *head, bool zero, bool allow_slow); + +size_t conti_mem_allocator_expand(struct conti_mem_allocator *allocator, size_t size); + +size_t conti_mem_allocator_contract(struct conti_mem_allocator *allocator, size_t size); + +bool conti_mem_allocator_isolate_memseg(struct conti_mem_allocator *allocator, unsigned long addr); + +#endif diff --git a/drivers/ub/obmm/obmm_addr_check.c b/drivers/ub/obmm/obmm_addr_check.c new file mode 100644 index 0000000000000000000000000000000000000000..4619dd9e6c6b01785aae269c813624615d153992 --- /dev/null +++ b/drivers/ub/obmm/obmm_addr_check.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#define pr_fmt(fmt) "OBMM: addr_check:" fmt + +#include +#include +#include +#include +#include + +#include "obmm_addr_check.h" + +struct pa_checker { + spinlock_t lock; + struct maple_tree pa_ranges; +}; +static struct pa_checker g_pa_checker; + +static bool is_same_pa_range(const struct obmm_pa_range *l, const struct obmm_pa_range *r) +{ + return l->start == r->start && l->end == r->end; +} + +int occupy_pa_range(const struct obmm_pa_range *pa_range) +{ + int ret; + void *persist_info; + unsigned long flags; + + persist_info = kmemdup(pa_range, sizeof(*pa_range), GFP_KERNEL); + if (persist_info == NULL) + return -ENOMEM; + + spin_lock_irqsave(&g_pa_checker.lock, flags); + ret = mtree_insert_range(&g_pa_checker.pa_ranges, (unsigned long)pa_range->start, + (unsigned long)pa_range->end, persist_info, GFP_ATOMIC); + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + + if (ret != 0) { + kfree(persist_info); + pr_err("failed to occupy PA range: ret=%pe\n", ERR_PTR(ret)); + return ret; + } + return 0; +} + +int free_pa_range(const struct obmm_pa_range *pa_range) +{ + int ret; + const char *user; + void *entry; + unsigned long flags; + + spin_lock_irqsave(&g_pa_checker.lock, flags); + entry = mtree_erase(&g_pa_checker.pa_ranges, (unsigned long)pa_range->start); + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + if (!entry) { + pr_err("PA range to be freed not found.\n"); + return -EFAULT; + } + ret = 0; + if (!is_same_pa_range((const struct obmm_pa_range *)entry, pa_range)) { + /* expected to be UNREACHABLE */ + pr_err("BUG: PA range to be freed does not fully match.\n"); + ret = -ENOTRECOVERABLE; + } + user = ((struct obmm_pa_range *)entry)->info.user == OBMM_ADDR_USER_DIRECT_IMPORT ? + "import" : "preimport"; + kfree(entry); + return ret; +} + +int query_pa_range(phys_addr_t addr, struct obmm_addr_info *info) +{ + unsigned long index, flags; + const struct obmm_pa_range *retrieved; + + if (info == NULL) + return -EINVAL; + + index = (unsigned long)addr; + spin_lock_irqsave(&g_pa_checker.lock, flags); + retrieved = (const struct obmm_pa_range *)mt_find(&g_pa_checker.pa_ranges, &index, index); + if (retrieved) { + info->user = retrieved->info.user; + info->data = retrieved->info.data; + } + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + + if (!retrieved) + return -EFAULT; + return 0; +} + +int update_pa_range(phys_addr_t addr, const struct obmm_addr_info *info) +{ + unsigned long index, flags; + struct obmm_pa_range *retrieved; + + if (info == NULL) + return -EINVAL; + + index = (unsigned long)addr; + spin_lock_irqsave(&g_pa_checker.lock, flags); + retrieved = (struct obmm_pa_range *)mt_find(&g_pa_checker.pa_ranges, &index, index); + if (retrieved) { + retrieved->info.user = info->user; + retrieved->info.data = info->data; + } + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + + if (!retrieved) + return -EFAULT; + return 0; +} + +void module_addr_check_init(void) +{ + mt_init(&g_pa_checker.pa_ranges); + spin_lock_init(&g_pa_checker.lock); +} +void module_addr_check_exit(void) +{ + mtree_destroy(&g_pa_checker.pa_ranges); +} diff --git a/drivers/ub/obmm/obmm_addr_check.h b/drivers/ub/obmm/obmm_addr_check.h new file mode 100644 index 0000000000000000000000000000000000000000..d68586cad455147e2e91454bffc83690028297fc --- /dev/null +++ b/drivers/ub/obmm/obmm_addr_check.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_ADDR_DUP_CHECK_H +#define OBMM_ADDR_DUP_CHECK_H + +#include + +enum obmm_addr_user { + OBMM_ADDR_USER_DIRECT_IMPORT, + OBMM_ADDR_USER_PREIMPORT, +}; +struct obmm_addr_info { + enum obmm_addr_user user; + void *data; +}; + +struct obmm_pa_range { + phys_addr_t start; + phys_addr_t end; + struct obmm_addr_info info; +}; + +int occupy_pa_range(const struct obmm_pa_range *pa_range); +int free_pa_range(const struct obmm_pa_range *pa_range); + +/* @addr is the search key and @info stores output value */ +int query_pa_range(phys_addr_t addr, struct obmm_addr_info *info); +/* @addr is the search key and @info stores the overwrite value */ +int update_pa_range(phys_addr_t addr, const struct obmm_addr_info *info); + +void module_addr_check_init(void); +void module_addr_check_exit(void); + +#endif diff --git a/drivers/ub/obmm/obmm_cache.c b/drivers/ub/obmm/obmm_cache.c new file mode 100644 index 0000000000000000000000000000000000000000..1909da83de4c6b1e657df322484c52f17c4a1624 --- /dev/null +++ b/drivers/ub/obmm/obmm_cache.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2025. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "obmm_core.h" +#include "obmm_export_region_ops.h" +#include "obmm_import.h" +#include "obmm_cache.h" + +static bool skip_cache_maintain; +module_param(skip_cache_maintain, bool, 0444); +MODULE_PARM_DESC(skip_cache_maintain, + "Whether to skip cache maintain operation (to suppress errors in simulations)."); + +static bool is_valid_cache_ops(unsigned long cache_ops) +{ + return cache_ops == OBMM_SHM_CACHE_NONE || cache_ops == OBMM_SHM_CACHE_INVAL || + cache_ops == OBMM_SHM_CACHE_WB_ONLY || cache_ops == OBMM_SHM_CACHE_WB_INVAL; +} + +#define UB_MEM_DRAIN_TMOUT_MSEC 1000 + +int ub_write_queue_flush(uint32_t scna) +{ + unsigned long ub_mem_drain_timeout = jiffies + msecs_to_jiffies(UB_MEM_DRAIN_TMOUT_MSEC); + + pr_debug("call external: ub_mem_drain(scna=%#x)\n", scna); + + ub_mem_drain_start(scna); + while (!ub_mem_drain_state(scna)) { + if (time_after(jiffies, ub_mem_drain_timeout)) { + pr_err("ub_mem_drain not completed within %d msecs\n", + UB_MEM_DRAIN_TMOUT_MSEC); + return -ETIMEDOUT; + } + cpu_relax(); + } + + pr_debug("external called: ub_mem_drain\n"); + return 0; +} + +#define MAX_FLUSH_SIZE (1UL << 30) +/* the flush_cache_by_pa will yield CPU */ +#define MAX_RESCHED_ROUND 10 +#define CACHE_FLUSH_RETRY_MS 10 +int flush_cache_by_pa(phys_addr_t addr, size_t size, unsigned long cache_ops) +{ + static DEFINE_SEMAPHORE(sem, 1); + static const enum hisi_soc_cache_maint_type hisi_maint_type[] = { + /* OBMM_SHM_CACHE_NONE does not have a maintenance type */ + [OBMM_SHM_CACHE_NONE] = HISI_CACHE_MAINT_MAX, + [OBMM_SHM_CACHE_INVAL] = HISI_CACHE_MAINT_MAKEINVALID, + [OBMM_SHM_CACHE_WB_INVAL] = HISI_CACHE_MAINT_CLEANINVALID, + [OBMM_SHM_CACHE_WB_ONLY] = HISI_CACHE_MAINT_CLEANSHARED, + }; + + phys_addr_t curr_addr = addr; + size_t remain_size = size; + int ret = 0, round_to_resched = MAX_RESCHED_ROUND; + enum hisi_soc_cache_maint_type maint_type = hisi_maint_type[cache_ops]; + + if (skip_cache_maintain) { + pr_debug_ratelimited("cache maintenance request {cache_ops=%lu}.\n", cache_ops); + return 0; + } + + if (!is_valid_cache_ops(cache_ops)) { + pr_err("invalid cache_ops %lu.\n", cache_ops); + return -EINVAL; + } + + down(&sem); + while (remain_size != 0) { + size_t flush_size; + + flush_size = remain_size <= MAX_FLUSH_SIZE ? remain_size : MAX_FLUSH_SIZE; + + /* retry if there is contention over hardware */ + while (true) { + pr_debug("call external: hisi_soc_cache_maintain(0x%llx, 0x%zx, %u)\n", + curr_addr, flush_size, maint_type); + ret = hisi_soc_cache_maintain(curr_addr, flush_size, maint_type); + pr_debug("external called: hisi_soc_cache_maintain(), ret=%pe\n", + ERR_PTR(ret)); + + if (ret != -EBUSY) + break; + pr_warn_once("Racing access of cache flushing hardware identified. The performance of UB memory may significantly degrade.\n"); + msleep(CACHE_FLUSH_RETRY_MS); + } + if (ret) + break; + + curr_addr += flush_size; + remain_size -= flush_size; + if (--round_to_resched == 0) { + cond_resched(); + round_to_resched = MAX_RESCHED_ROUND; + } + } + up(&sem); + + if (remain_size != 0) + pr_warn("%s: 0x%zx@0x%llx not flushed due to unexpected error; ret=%pe.\n", + __func__, remain_size, curr_addr, ERR_PTR(ret)); + + return ret; +} + +int obmm_region_flush_range(struct obmm_region *reg, unsigned long offset, unsigned long length, + uint8_t cache_ops) +{ + int ret; + struct obmm_import_region *i_reg; + struct obmm_export_region *e_reg; + + /* validation */ + if (!is_valid_cache_ops(cache_ops)) { + pr_err("invalid cache operation %u\n", cache_ops); + return -EINVAL; + } + if (offset >= reg->mem_size || length > reg->mem_size - offset || + !IS_ALIGNED(offset, PAGE_SIZE) || !IS_ALIGNED(length, PAGE_SIZE)) { + pr_err("invalid flush range for region=%d: offset=0x%lx, flush_length=0x%lx, region_length=0x%llx\n", + reg->regionid, offset, length, reg->mem_size); + return -EINVAL; + } + + if (cache_ops == OBMM_SHM_CACHE_NONE) + return 0; + pr_debug("flush cache: region=%d, offset=0x%lx, length=0x%lx, cache_ops=%u\n", + reg->regionid, offset, length, cache_ops); + /* clear cache and ubus queue */ + if (reg->type == OBMM_IMPORT_REGION) { + i_reg = container_of(reg, struct obmm_import_region, region); + ret = flush_import_region(i_reg, offset, length, cache_ops); + } else { + e_reg = container_of(reg, struct obmm_export_region, region); + ret = flush_export_region(e_reg, offset, length, cache_ops); + } + + if (ret) + pr_err("flush failed: region=%d, offset=0x%lx, length=0x%lx, cache_ops=%u\n", + reg->regionid, offset, length, cache_ops); + else + pr_debug("cache successfully flushed.\n"); + return ret; +} + +/* flush the entire process address space */ +void obmm_flush_tlb(struct mm_struct *mm) +{ + unsigned long asid; + + dsb(ishst); + asid = __TLBI_VADDR(0, ASID(mm)); + __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); + dsb(ish); +} + +struct modify_info { + int pmd_cnt; + int pte_cnt; + int pmd_leaf_cnt; + int hugetlb_cnt; + bool cacheable; +}; + +static int modify_hugetlb_prot(pte_t *pte, unsigned long hmask __always_unused, + unsigned long addr __always_unused, + unsigned long next __always_unused, struct mm_walk *walk) +{ + struct modify_info *info = (struct modify_info *)walk->private; + bool cacheable = info->cacheable; + struct vm_area_struct *vma = walk->vma; + spinlock_t *ptl; + pgprot_t prot; + pte_t entry; + + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); + entry = ptep_get(pte); + if (unlikely(!pte_present(entry))) { + pr_warn("%s: addr = 0x%lx, pte not present\n", __func__, addr); + spin_unlock(ptl); + return 0; + } + + info->hugetlb_cnt++; + + prot = cacheable ? pgprot_tagged(pte_pgprot(entry)) : + pgprot_writecombine(pte_pgprot(entry)); + entry = pte_modify(entry, prot); + __set_pte(pte, entry); + + spin_unlock(ptl); + return 0; +} + +int modify_pgtable_prot(struct mm_struct *mm, void *va, size_t size, bool cacheable) +{ + struct modify_info info = { 0 }; + struct mm_walk_ops walk_ops = { + .hugetlb_entry = modify_hugetlb_prot, + }; + + info.cacheable = cacheable; + unsigned long start = (uintptr_t)va; + unsigned long end = start + size; + + mmap_read_lock(mm); + walk_page_range(mm, start, end, &walk_ops, &info); + mmap_read_unlock(mm); + obmm_flush_tlb(mm); + + return 0; +} diff --git a/drivers/ub/obmm/obmm_cache.h b/drivers/ub/obmm/obmm_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..28da446e0666cb2488f98057bfcad71c5bee55a6 --- /dev/null +++ b/drivers/ub/obmm/obmm_cache.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_CACHE_H +#define OBMM_CACHE_H + +#include +#include "obmm_core.h" + +int ub_write_queue_flush(uint32_t scna); + +/* This function serializes all cache flush request issued by OBMM to avoid + * hardware resource contention + */ +int flush_cache_by_pa(phys_addr_t addr, size_t size, unsigned long cache_ops); +int obmm_region_flush_range(struct obmm_region *reg, unsigned long offset, unsigned long length, + uint8_t cache_ops); +void obmm_flush_tlb(struct mm_struct *mm); +/* Caller must guarantee that there is no concurrent modify requests made to the same va range. */ +int modify_pgtable_prot(struct mm_struct *mm, void *va, size_t size, bool cacheable); +int obmm_cache_clear(void); + +/* Defined in drivsers/soc/hisilicon, exported but not defined in their header file. */ +extern int hisi_soc_cache_maintain(phys_addr_t addr, size_t size, + enum hisi_soc_cache_maint_type maint_type); + +#endif diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c new file mode 100644 index 0000000000000000000000000000000000000000..a0a681d947bca9c748d5a50798468b008343432f --- /dev/null +++ b/drivers/ub/obmm/obmm_core.c @@ -0,0 +1,649 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "obmm_shm_dev.h" +#include "obmm_cache.h" +#include "obmm_export_region_ops.h" +#include "ubmempool_allocator.h" +#include "obmm_import.h" +#include "obmm_ownership.h" +#include "obmm_lowmem.h" +#include "obmm_preimport.h" +#include "obmm_addr_check.h" +#include "obmm_sysfs.h" +#include "obmm_export.h" +#include "obmm_core.h" + +size_t __obmm_memseg_size; + +/* + * OBMM centers around regions -- "struct obmm_region". Each region represents + * a chunk of memory. OBMM exposes its interface to user space through the + * device interface. Users may manipulate the memory region through ioctl to + * master device /dev/obmm, and access each memory region through standard file + * operations like open, close and mmap. + * + * To support remote memory access via UB, OBMM models two different types of + * regions, the export region and the import region. As the name suggests, the + * export region is physically located on this host (local), while the import + * region is physically attached to another host (remote). + * + * All /dev/obmm operations are essentially region creation and deletion. + * Currently, a linked list is used to keep track of all active regions. + * + * All region device (/dev/obmm_shmdev{region_id}) operations access its own + * region only. To keep our management in accordance with Linux standard device + * file, each device file's life cycle should be decided only by its reference + * counts. Therefore, the master device cannot forcefully remove a region in + * use. This complicates concurrency control and region life cycle management. + * + * concurrency control: when region is created, the only accessor to the region + * is its creator, and there is no concurrency issues to worry about. The + * concurrent access starts when we "publish" the region on the region list. + * + * All new accessors get the pointer to the region from the region list, + * directly or indirectly. Most accessors merely read some region attributes. + * Their read-only nature simplifies concurrency control, and all we need to do + * is to guarantee that the region will not be freed by others during their + * access. This is done by the "refcnt" reference counter. Using the conditional + * atomic instructions, "refcnt" is also in charge of guarding against access + * before initialization is completed, access during destruction and double-free + * problems. + */ + +static struct obmm_ctx_info g_obmm_ctx_info; +static DEFINE_IDA(g_obmm_region_ida); + +/* Return the pointer to region only if the region is active: not in initialization or + * destruction process. + */ +struct obmm_region *try_get_obmm_region(struct obmm_region *region) +{ + if (region && refcount_inc_not_zero(®ion->refcnt)) + return region; + return NULL; +} +void put_obmm_region(struct obmm_region *region) +{ + if (region) + refcount_dec(®ion->refcnt); +} +void activate_obmm_region(struct obmm_region *region) +{ + refcount_set(®ion->refcnt, 1); +} +/* Return whether the disable is success. disable succeed only when the region is active and idle */ +static inline bool disable_obmm_region_get(struct obmm_region *region) +{ + return refcount_dec_if_one(®ion->refcnt); +} + +static struct obmm_region *_search_obmm_region(int regionid) +{ + struct obmm_region *region_now; + + list_for_each_entry(region_now, &g_obmm_ctx_info.regions, node) { + if (region_now->regionid == regionid) + return region_now; + } + return NULL; +} + +struct obmm_region *search_get_obmm_region(int regionid) +{ + struct obmm_region *region; + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + spin_lock_irqsave(lock, flags); + region = _search_obmm_region(regionid); + region = try_get_obmm_region(region); + spin_unlock_irqrestore(lock, flags); + + return region; +} + +struct obmm_region *search_deactivate_obmm_region(int regionid) +{ + struct obmm_region *region; + unsigned long flags; + spinlock_t *lock; + bool success; + + lock = &g_obmm_ctx_info.lock; + spin_lock_irqsave(lock, flags); + region = _search_obmm_region(regionid); + success = region && disable_obmm_region_get(region); + spin_unlock_irqrestore(lock, flags); + + if (!region) { + pr_err("failed to deactivate: region with mem_id=%d not found.\n", regionid); + return ERR_PTR(-ENOENT); + } + + if (!success) { + pr_err("failed to deactivate: region %d is being used or in creation/destruction process.\n", + region->regionid); + return ERR_PTR(-EBUSY); + } + + return region; +} + +int obmm_query_by_offset(struct obmm_region *reg, unsigned long offset, + struct obmm_ext_addr *ext_addr) +{ + int ret; + struct obmm_export_region *e_reg; + struct obmm_import_region *i_reg; + + if (reg->type == OBMM_EXPORT_REGION) { + e_reg = container_of(reg, struct obmm_export_region, region); + ret = get_offset_detail_export_region(e_reg, offset, ext_addr); + } else { + i_reg = container_of(reg, struct obmm_import_region, region); + ret = get_offset_detail_import(i_reg, offset, ext_addr); + } + return ret; +} + +int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr) +{ + int ret = -ENOENT; + struct obmm_region *region; + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + + spin_lock_irqsave(lock, flags); + list_for_each_entry(region, &g_obmm_ctx_info.regions, node) { + if (!try_get_obmm_region(region)) + continue; + if (region->type == OBMM_IMPORT_REGION) { + struct obmm_import_region *i_reg; + + i_reg = container_of(region, struct obmm_import_region, region); + ret = get_pa_detail_import(i_reg, pa, ext_addr); + } + if (region->type == OBMM_EXPORT_REGION) { + struct obmm_export_region *e_reg; + + e_reg = container_of(region, struct obmm_export_region, region); + ret = get_pa_detail_export_region(e_reg, pa, ext_addr); + } + put_obmm_region(region); + if (ret == 0) + break; + } + spin_unlock_irqrestore(lock, flags); + + if (ret) + return -ENOENT; + return 0; +} + +static int nid_to_package_id(int nid) +{ + const struct cpumask *cpumask; + int cpu; + + /* the check guard against the dynamic online / offline of local node */ + if (!is_online_local_node(nid)) + return -1; + + /* currently we cannot handle CPU-less local memory node */ + cpumask = cpumask_of_node(nid); + if (cpumask_empty(cpumask)) + return -1; + + cpu = (int)cpumask_first(cpumask); + return topology_physical_package_id(cpu); +} + +/* return -1 when any of the node is not online or is in different packages (sockets) */ +static int get_nodes_package(const nodemask_t *nodes) +{ + int nid, package_id, this_package_id; + + package_id = -1; + for_each_node_mask(nid, *nodes) { + this_package_id = nid_to_package_id(nid); + if (this_package_id == -1) + return -1; + if (package_id == -1) + package_id = this_package_id; + else if (package_id != this_package_id) + return -1; + } + return package_id; +} + +bool nodes_on_same_package(const nodemask_t *nodes) +{ + return get_nodes_package(nodes) != -1; +} + +bool validate_scna(u32 scna) +{ + int ret = ub_mem_get_numa_id(scna); + + if (ret < 0) { + pr_err("%#x is not a known scna, lookup ret=%pe\n", scna, ERR_PTR(ret)); + return false; + } + return true; +} + +bool validate_obmm_mem_id(__u64 mem_id) +{ + bool valid; + + valid = mem_id >= OBMM_MIN_VALID_REGIONID && mem_id <= OBMM_MAX_VALID_REGIONID; + if (!valid) + pr_err("mem_id=%llu is out of valid mem_id range.\n", mem_id); + return valid; +} + +static int insert_obmm_region(struct obmm_region *reg) +{ + struct obmm_region *region_now; + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + spin_lock_irqsave(lock, flags); + + region_now = _search_obmm_region(reg->regionid); + if (region_now != NULL) { + spin_unlock_irqrestore(lock, flags); + pr_err("obmm region already exist, mem_id = %d\n", reg->regionid); + return -EEXIST; + } + + list_add(®->node, &g_obmm_ctx_info.regions); + spin_unlock_irqrestore(lock, flags); + return 0; +} + +static void remove_obmm_region(struct obmm_region *reg) +{ + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + + spin_lock_irqsave(lock, flags); + + list_del(®->node); + + spin_unlock_irqrestore(lock, flags); +} + +void uninit_obmm_region(struct obmm_region *region) +{ + if (region->ownership_info) + release_ownership_info(region); + ida_free(&g_obmm_region_ida, region->regionid); + mutex_destroy(®ion->state_mutex); +} + +int init_obmm_region(struct obmm_region *region) +{ + int retval; + + refcount_set(®ion->refcnt, 0); + mutex_init(®ion->state_mutex); + INIT_LIST_HEAD(®ion->node); + + retval = ida_alloc_range(&g_obmm_region_ida, OBMM_MIN_VALID_REGIONID, + OBMM_MAX_VALID_REGIONID, GFP_KERNEL); + if (retval < 0) { + pr_err("Failed to allocate mem_id, ret=%pe\n", ERR_PTR(retval)); + return retval; + } + region->regionid = retval; + + return 0; +} + +int register_obmm_region(struct obmm_region *region) +{ + int retval; + + /* create device */ + retval = obmm_shm_dev_add(region); + if (retval) { + pr_err("Failed to create device %d. ret=%pe\n", region->regionid, ERR_PTR(retval)); + return retval; + } + + /* insert OBMM_region */ + retval = insert_obmm_region(region); + if (retval < 0) { + pr_err("Failed to insert obmm region %d on creation. ret=%pe\n", region->regionid, + ERR_PTR(retval)); + obmm_shm_dev_del(region); + return retval; + } + + return 0; +} + +void deregister_obmm_region(struct obmm_region *region) +{ + remove_obmm_region(region); + obmm_shm_dev_del(region); +} + +int set_obmm_region_priv(struct obmm_region *region, unsigned int priv_len, const void __user *priv) +{ + region->priv_len = 0; + if (priv_len > OBMM_MAX_PRIV_LEN) { + pr_err("priv_len=%u too large (limit=%u).\n", priv_len, OBMM_MAX_PRIV_LEN); + return -EINVAL; + } + + if (copy_from_user(region->priv, priv, priv_len)) { + pr_err("failed to save private data.\n"); + return -EFAULT; + } + region->priv_len = priv_len; + return 0; +} + +static int obmm_addr_query(struct obmm_cmd_addr_query *cmd_addr_query) +{ + int ret; + struct obmm_ext_addr ext_addr; + struct obmm_region *region; + + if (cmd_addr_query->key_type == OBMM_QUERY_BY_PA) { + pr_debug("obmm_query_by_pa: pa=%#llx\n", cmd_addr_query->pa); + ret = obmm_query_by_pa(cmd_addr_query->pa, &ext_addr); + if (ret == 0) { + cmd_addr_query->mem_id = ext_addr.regionid; + cmd_addr_query->offset = ext_addr.offset; + } + return ret; + } else if (cmd_addr_query->key_type == OBMM_QUERY_BY_ID_OFFSET) { + pr_debug("obmm_query_by_id_offset: mem_id=%llu offset=%#llx\n", + cmd_addr_query->mem_id, cmd_addr_query->offset); + if (!validate_obmm_mem_id(cmd_addr_query->mem_id)) + return -ENOENT; + region = search_get_obmm_region(cmd_addr_query->mem_id); + if (region == NULL) { + pr_err("region %llu not found.\n", cmd_addr_query->mem_id); + return -ENOENT; + } + ret = obmm_query_by_offset(region, cmd_addr_query->offset, &ext_addr); + if (ret == 0) + cmd_addr_query->pa = ext_addr.pa; + put_obmm_region(region); + return ret; + } + pr_err("invalid query key type: %u.\n", cmd_addr_query->key_type); + return -EINVAL; +} + +static int obmm_dev_open(struct inode *inode __always_unused, struct file *file __always_unused) +{ + return 0; +} + +static int obmm_dev_flush(struct file *file __always_unused, fl_owner_t owner __always_unused) +{ + return 0; +} + +static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, unsigned long arg) +{ + int ret; + union { + struct obmm_cmd_export create; + struct obmm_cmd_import import; + struct obmm_cmd_unexport unexport; + struct obmm_cmd_unimport unimport; + struct obmm_cmd_addr_query query; + struct obmm_cmd_export_pid export_pid; + struct obmm_cmd_preimport preimport; + } cmd_param; + + switch (cmd) { + case OBMM_CMD_EXPORT: { + ret = (int)copy_from_user(&cmd_param.create, (void __user *)arg, + sizeof(struct obmm_cmd_export)); + if (ret) { + pr_err("failed to load export argument\n"); + return -EFAULT; + } + + ret = obmm_export_from_pool(&cmd_param.create); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.create, + sizeof(struct obmm_cmd_export)); + if (ret) { + pr_err("failed to write export result\n"); + return -EFAULT; + } + } break; + case OBMM_CMD_IMPORT: { + ret = (int)copy_from_user(&cmd_param.import, (void __user *)arg, + sizeof(struct obmm_cmd_import)); + if (ret) { + pr_err("failed to load import argument\n"); + return -EFAULT; + } + + ret = obmm_import(&cmd_param.import); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.import, + sizeof(struct obmm_cmd_import)); + if (ret) { + pr_err("failed to write import result\n"); + return -EFAULT; + } + } break; + case OBMM_CMD_UNEXPORT: { + ret = (int)copy_from_user(&cmd_param.unexport, (void __user *)arg, + sizeof(struct obmm_cmd_unexport)); + if (ret) { + pr_err("failed to load unexport argument\n"); + return -EFAULT; + } + + ret = obmm_unexport(&cmd_param.unexport); + } break; + case OBMM_CMD_UNIMPORT: { + ret = (int)copy_from_user(&cmd_param.unimport, (void __user *)arg, + sizeof(struct obmm_cmd_unimport)); + if (ret) { + pr_err("failed to load unimport argument\n"); + return -EFAULT; + } + + ret = obmm_unimport(&cmd_param.unimport); + } break; + case OBMM_CMD_ADDR_QUERY: { + ret = (int)copy_from_user(&cmd_param.query, (void __user *)arg, + sizeof(struct obmm_cmd_addr_query)); + if (ret) { + pr_err("failed to load addr_query argument\n"); + return -EFAULT; + } + + ret = obmm_addr_query(&cmd_param.query); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.query, + sizeof(struct obmm_cmd_addr_query)); + if (ret) { + pr_err("failed to write obmm_query result\n"); + return -EFAULT; + } + } break; + case OBMM_CMD_EXPORT_PID: { + ret = (int)copy_from_user(&cmd_param.export_pid, (void __user *)arg, + sizeof(struct obmm_cmd_export_pid)); + if (ret) { + pr_err("Failed to load export_pid param.\n"); + return -EFAULT; + } + + ret = obmm_export_pid(&cmd_param.export_pid); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.export_pid, + sizeof(struct obmm_cmd_export_pid)); + if (ret) { + pr_err("failed to write export_pid result.\n"); + return -EFAULT; + } + } break; + case OBMM_CMD_DECLARE_PREIMPORT: { + ret = (int)copy_from_user(&cmd_param.preimport, (void __user *)arg, + sizeof(struct obmm_cmd_preimport)); + if (ret) { + pr_err("failed to load preimport argument\n"); + return -EFAULT; + } + + ret = obmm_preimport(&cmd_param.preimport); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.preimport, + sizeof(struct obmm_cmd_preimport)); + if (ret) { + pr_err("failed to write preimport result\n"); + return -EFAULT; + } + } break; + case OBMM_CMD_UNDECLARE_PREIMPORT: { + ret = (int)copy_from_user(&cmd_param.preimport, (void __user *)arg, + sizeof(struct obmm_cmd_preimport)); + if (ret) { + pr_err("failed to load preimport argument\n"); + return -EFAULT; + } + + ret = obmm_unpreimport(&cmd_param.preimport); + } break; + default: + ret = -ENOTTY; + } + + return ret; +} + +const struct file_operations obmm_dev_fops = { .owner = THIS_MODULE, + .unlocked_ioctl = obmm_dev_ioctl, + .open = obmm_dev_open, + .flush = obmm_dev_flush }; + +static struct miscdevice obmm_dev_handle = { .minor = MISC_DYNAMIC_MINOR, + .name = OBMM_DEV_NAME, + .fops = &obmm_dev_fops }; + +static int __init obmm_init(void) +{ + int ret; + + pr_info("obmm_module: init started\n"); + + ret = ubmempool_allocator_init(); + if (ret) { + pr_err("Failed to init allocator. ret=%pe\n", ERR_PTR(ret)); + return ret; + } + + ret = misc_register(&obmm_dev_handle); + if (ret) { + pr_err("Failed to register root device. ret=%pe\n", ERR_PTR(ret)); + goto out_allocator_exit; + } + + spin_lock_init(&g_obmm_ctx_info.lock); + INIT_LIST_HEAD(&g_obmm_ctx_info.regions); + + ret = obmm_shm_dev_init(); + if (ret) { + pr_err("failed to initialize obmm_shm_dev. ret=%pe\n", ERR_PTR(ret)); + goto out_misc_deregister; + } + + module_addr_check_init(); + + ret = module_preimport_init(); + if (ret) { + pr_err("failed to initialize preimport range manager. ret=%pe.\n", ERR_PTR(ret)); + goto out_addr_check_exit; + } + + ret = lowmem_notify_init(); + if (ret) { + pr_err("failed to initialize lowmem handler. ret=%pe\n", ERR_PTR(ret)); + goto out_module_import_exit; + } + + pr_info("obmm_module: init completed\n"); + return ret; + +out_module_import_exit: + module_preimport_exit(); +out_addr_check_exit: + module_addr_check_exit(); + obmm_shm_dev_exit(); +out_misc_deregister: + misc_deregister(&obmm_dev_handle); +out_allocator_exit: + ubmempool_allocator_exit(); + return ret; +} + +static void __exit obmm_exit(void) +{ + pr_info("obmm_module: exit started\n"); + + lowmem_notify_exit(); + module_preimport_exit(); + module_addr_check_exit(); + obmm_shm_dev_exit(); + misc_deregister(&obmm_dev_handle); + ubmempool_allocator_exit(); + + pr_info("obmm_module: exit completed\n"); +} + +module_init(obmm_init); +module_exit(obmm_exit); + +MODULE_DESCRIPTION("OBMM Framework's implementations."); +MODULE_AUTHOR("Huawei Tech. Co., Ltd."); +MODULE_LICENSE("GPL"); diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h new file mode 100644 index 0000000000000000000000000000000000000000..ea6a1f6b5a82c93d4b9abd1a550056d12fe0f6b4 --- /dev/null +++ b/drivers/ub/obmm/obmm_core.h @@ -0,0 +1,319 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + * Author: + */ + +#ifndef OBMM_CORE_H +#define OBMM_CORE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OBMM_DEV_NAME "obmm" + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "OBMM: " fmt + +#define EID_BYTES 16 +#define EID_FMT64 "%#llx:%#llx" +#define EID_ALIGNED_FMT64 "%#0*llx:%#0*llx" + +#define EID_ARGS64_H(eid) (*(u64 *)&(eid)[8]) +#define EID_ARGS64_L(eid) (*(u64 *)&(eid)[0]) + +extern size_t __obmm_memseg_size; +#define OBMM_MEMSEG_SIZE __obmm_memseg_size +/* + * The maximum of {OBMM_MEMSEG_SIZE, PAGE_SIZE and CACHE_MAINTAIN_GRANU}. + */ +#define OBMM_BASIC_GRANU PMD_SIZE + +#define MAX_MEMINFO_COUNT MAX_IMPORT_COUNT + +struct ubmem_resource; + +enum obmm_region_type { + OBMM_EXPORT_REGION, + OBMM_IMPORT_REGION +}; + +enum obmm_mmap_mode { + OBMM_MMAP_INIT, + OBMM_MMAP_NORMAL, + OBMM_MMAP_OSYNC +}; + +enum obmm_mmap_granu { + OBMM_MMAP_GRANU_NONE, + OBMM_MMAP_GRANU_PAGE, + OBMM_MMAP_GRANU_PMD +}; + +#define OBMM_REGION_FLAG_NUMA_REMOTE 0x1 +#define OBMM_REGION_FLAG_ALLOW_MMAP 0x2 +#define OBMM_REGION_FLAG_MEMORY_FROM_USER 0x4 +#define OBMM_REGION_FLAG_FAST_ALLOC 0x8 +#define OBMM_REGION_FLAG_PREIMPORT 0x10 + +#define OBMM_INVALID_REGIONID 0 +#define OBMM_MIN_VALID_REGIONID 1 +#define OBMM_MAX_VALID_REGIONID MINORMASK +#define OBMM_REGIONID_MAX_COUNT (OBMM_MAX_VALID_REGIONID - OBMM_MIN_VALID_REGIONID + 1) + +#define OBMM_MEM_ALLOW_CACHEABLE_MMAP 0x1 +#define OBMM_MEM_ALLOW_NONCACHEABLE_MMAP 0x2 + +/* invalidate cache **on start-up** */ +/* region models a set of memory to share across hosts: a unit of sharing. */ +struct obmm_region { + /* unique within host -- can be used as an access handle */ + int regionid; + + /* import or export */ + enum obmm_region_type type; + + unsigned long flags; + + struct cdev cdevice; + struct device device; + atomic_t device_released; + + refcount_t refcnt; + + /* the total size of all memory segments included in meminfo */ + u64 mem_size; + /* + * current mapping mode. + * init: mmap_mode = OBMM_MMAP_INIT + * cc-mmap: mmap_mode = OBMM_MMAP_NORMAL + * nc-mmap: mmap_mode = OBMM_MMAP_OSYNC + */ + enum obmm_mmap_mode mmap_mode; + /* + * the granularity of memory mapping, initially OBMM_MMAP_GRANU_NONE. + * When users call mmap, the mmap granularity is determined based on + * the mmap flags and OBMM_REGION_FLAG_ALLOW_MMAP. + */ + enum obmm_mmap_granu mmap_granu; + /* + * Determines what mode the memory can be mapped with. + * OBMM_MEM_ALLOW_CACHEABLE_MMAP: Supports cacheable mapping + * OBMM_MEM_ALLOW_NONCACHEABLE_MMAP: Supports non-cacheable mapping + */ + unsigned long mem_cap; + /* number of mmap */ + unsigned long mmap_count; + + struct obmm_ownership_info *ownership_info; + /* protect ownership_info and serialize concurrent page table change requests */ + struct mutex state_mutex; + + /* regions are chained into a list for management */ + struct list_head node; + + unsigned int priv_len; + unsigned char priv[OBMM_MAX_PRIV_LEN]; +}; + +static inline bool region_numa_remote(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_NUMA_REMOTE; +} +static inline bool region_allow_mmap(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_ALLOW_MMAP; +} +static inline bool region_memory_from_user(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_MEMORY_FROM_USER; +} +static inline bool region_preimport(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_PREIMPORT; +} +static inline bool region_fast_alloc(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_FAST_ALLOC; +} + +struct obmm_import_region { + struct obmm_region region; + + u32 dcna; + u32 scna; + + /* resource of the PA range */ + struct ubmem_resource *ubmem_res; + /* the resource for this region */ + struct resource *memdev_res; + u64 pa; + + /* imported NUMA node */ + int numa_id; + /* the base_dist passed in import, which in some scenario might be an ignored value. It is + * stored here make error rollback feasible. + */ + u8 base_dist; + + /* handle to manage associated preimport range */ + void *preimport_handle; + u8 deid[16]; + u8 seid[16]; +}; + +struct mem_description_pid { + int pid; + void __user *user_va; + int pinned; + u64 start_time; +}; +struct mem_description_pool { + struct list_head head[OBMM_MAX_LOCAL_NUMA_NODES]; +}; + +struct obmm_export_region { + struct obmm_region region; + + /* export region may use physical memory from NUMA node[0] to node[node_count-1] */ + unsigned int node_count; + uint64_t node_mem_size[OBMM_MAX_LOCAL_NUMA_NODES]; + + /* physical pages */ + union { + struct mem_description_pid mem_desc_pid; + struct mem_description_pool mem_desc; + }; + + /* DMA mapping */ + struct sg_table sgt; + + /* UMMU device for the tokenid */ + struct device *ummu_dev; + /* UMMU RAS event notifier */ + struct ummu_event_block *ummu_event_block; + + unsigned int tokenid; + u64 uba; + unsigned int vendor_len; + void *vendor_info; + int affinity; + u8 deid[16]; +}; + +struct obmm_datapath { + u32 scna; + u32 dcna; + const u8 *seid; + const u8 *deid; +}; + +struct obmm_ctx_info { + /* active */ + struct list_head regions; + spinlock_t lock; +}; + +void activate_obmm_region(struct obmm_region *region); +struct obmm_region *try_get_obmm_region(struct obmm_region *region); +/* Return a valid pointer or a NULL pointer. */ +struct obmm_region *search_get_obmm_region(int regionid); +/* Return a valid pointer or an error pointer, which will never be null. */ +struct obmm_region *search_deactivate_obmm_region(int regionid); +void put_obmm_region(struct obmm_region *region); + +void obmm_region_flush(struct obmm_region *reg, unsigned long cache_ops); + +/* Extended information of a byte address */ +struct obmm_ext_addr { + /* OBMM related */ + enum obmm_region_type region_type; + u32 regionid; + u64 offset; + + /* UB bus related */ + u32 tid; + u64 uba; + + /* host machine related */ + s32 numa_id; + u64 pa; +}; + +/* + * Get the extended OBMM information from a PA + * + * @pa: physical address to query + * @filter: the type of regions to check against + * @ext_addr: (output) the extended information related to the @pa + * + * Return 0 on success, negative value on failure (region not found). + */ +int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr); +/* + * Get the extended OBMM address information of a region by offset + * + * @region: the region to query about; refcount must held before calling this + * function + * @offset: the offset within the region (UBA offset) + * @ext_addr: (output) the extended information related to @region and @offset + * + * Return 0 on success, negative value on failure (region not found). + */ +int obmm_query_by_offset(struct obmm_region *reg, unsigned long offset, + struct obmm_ext_addr *ext_addr); + +bool nodes_on_same_package(const nodemask_t *nodes); + +/* return true if scna is a registered primary CNA of a bus controller. */ +bool validate_scna(u32 scna); +/* return true if the @mem_id is within valid range. It does not guarantee that the @mem_id is + * associated with a present region. Use search_get_obmm_region if one wants to make sure that the + * @mem_id is backed by an actual memdev. + */ +bool validate_obmm_mem_id(__u64 mem_id); + +/* internal helpers */ +static inline bool is_online_local_node(int node) +{ + return node_online(node) && !numa_is_remote_node(node); +} +static inline int __maybe_unused next_online_local_node(int node) +{ + do { + node = (int)next_node(node, node_online_map); + } while (node < MAX_NUMNODES && numa_is_remote_node(node)); + return node; +} +static inline int __maybe_unused first_online_local_node(void) +{ + int node = (int)first_node(node_online_map); + + while (node < MAX_NUMNODES && numa_is_remote_node(node)) + node = (int)next_node(node, node_online_map); + return node; +} +#define for_each_online_local_node(node) \ + for ((node) = first_online_local_node(); (node) < MAX_NUMNODES; \ + (node) = next_online_local_node(node)) + +int set_obmm_region_priv(struct obmm_region *region, unsigned int priv_len, + const void __user *priv); + +int init_obmm_region(struct obmm_region *region); +void uninit_obmm_region(struct obmm_region *region); + +int register_obmm_region(struct obmm_region *region); +void deregister_obmm_region(struct obmm_region *region); + +#endif diff --git a/drivers/ub/obmm/obmm_export.c b/drivers/ub/obmm/obmm_export.c new file mode 100644 index 0000000000000000000000000000000000000000..237eb1e122f92a173fff41c1f9581a2431f8088f --- /dev/null +++ b/drivers/ub/obmm/obmm_export.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "conti_mem_allocator.h" +#include "ubmempool_allocator.h" +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_export.h" +#include "obmm_shm_dev.h" + +int export_flags_to_region_flags(unsigned long *region_flags, unsigned long user_flags) +{ + *region_flags = 0; + + if (user_flags & (~OBMM_EXPORT_FLAG_MASK)) + return -EINVAL; + if (user_flags & OBMM_EXPORT_FLAG_ALLOW_MMAP) + *region_flags |= OBMM_REGION_FLAG_ALLOW_MMAP; + if (user_flags & OBMM_EXPORT_FLAG_FAST) + *region_flags |= OBMM_REGION_FLAG_FAST_ALLOC; + + return 0; +} +static int fill_ummu_info(struct tdev_attr *attr, struct obmm_export_region *e_reg) +{ + tdev_attr_init(attr); + attr->name = (char *)"OBMM_TDEV"; + if (e_reg->vendor_len > 0) { + attr->priv = kmemdup(e_reg->vendor_info, e_reg->vendor_len, GFP_KERNEL); + if (!attr->priv) + return -ENOMEM; + } + attr->priv_len = e_reg->vendor_len; + return 0; +} + +static void drain_ummu_info(struct tdev_attr *attr) +{ + kfree(attr->priv); +} + +static int setup_ummu(struct obmm_export_region *e_reg) +{ + struct tdev_attr attr; + uint32_t tokenid = UMMU_INVALID_TID; + int retval; + + retval = fill_ummu_info(&attr, e_reg); + if (retval) + return retval; + + /* register the memory region through UMMU */ + pr_info("call ummu_core_alloc_tdev(), priv_len=%u, tid=%u\n", attr.priv_len, tokenid); + e_reg->ummu_dev = ummu_core_alloc_tdev(&attr, &tokenid); + if (e_reg->ummu_dev == NULL) { + pr_err("Failed to create UMMU device\n"); + retval = -EPERM; + goto out_drain_info; + } + e_reg->tokenid = tokenid; + pr_debug("ummu_core_alloc_tdev() returned ummu_dev: tid=%u, name=%s\n", tokenid, + dev_name(e_reg->ummu_dev)); + + /* DMA mapping */ + pr_info("call dma_map_sgtable(..., dir=DMA_BIDIRECTIONAL, attrs=0)\n"); + retval = dma_map_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + if (retval) { + pr_err("Failed to map sgtable on UMMU. ret=%pe\n", ERR_PTR(retval)); + goto out_free_device; + } + pr_debug("dma_map_sgtable returned 0\n"); + + e_reg->uba = sg_dma_address(e_reg->sgt.sgl); + drain_ummu_info(&attr); + return 0; + +out_free_device: + if (ummu_core_free_tdev(e_reg->ummu_dev)) + pr_warn("Failed to create memory region but unable to cleanup allocated UMMU device\n"); +out_drain_info: + drain_ummu_info(&attr); + return retval; +} + +static int teardown_ummu(struct obmm_export_region *e_reg) +{ + int ret, rollback_ret; + + pr_debug("call external: dma_unmap_sgtable\n"); + dma_unmap_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + + pr_debug("call external: ummu_core_free_tdev()\n"); + ret = ummu_core_free_tdev(e_reg->ummu_dev); + if (ret) { + pr_err("Failed to free UMMU tdev, ret=%pe.\n", ERR_PTR(ret)); + goto err_free_tdev; + } + + return 0; + +err_free_tdev: + + rollback_ret = dma_map_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + if (rollback_ret) { + pr_err("Failed to map sgtable on UMMU. ret=%pe\n", ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } + if (e_reg->uba != sg_dma_address(e_reg->sgt.sgl)) { + pr_err("Tried remapping in UMMU on rollback but UBA changed.\n"); + ret = -ENOTRECOVERABLE; + pr_debug("call external: dma_unmap_sgtable\n"); + dma_unmap_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + } + return ret; +} + +/* Make sure the memory to be exported is in properly allocated and ready to be mapped by UMMU. + * The detailed information of the memory should be put in place in e_reg->sgt + */ +static int alloc_export_memory(struct obmm_export_region *e_reg) +{ + if (region_memory_from_user(&e_reg->region)) + return alloc_export_memory_pid(e_reg); + else + return alloc_export_memory_pool(e_reg); +} + +static void free_export_memory_pool(struct obmm_export_region *e_reg) +{ + sg_free_table(&e_reg->sgt); + free_memory_contiguous(&e_reg->mem_desc); +} + +static void free_export_memory(struct obmm_export_region *e_reg) +{ + if (region_memory_from_user(&e_reg->region)) + free_export_memory_pid(e_reg); + else + free_export_memory_pool(e_reg); +} + +/* Ensure all user inputs are properly converted and filled into the region. */ +int obmm_export_common(struct obmm_export_region *e_reg) +{ + int ret; + + ret = alloc_export_memory(e_reg); + if (ret) + return ret; + + ret = setup_ummu(e_reg); + if (ret) + goto free_memory; + + return 0; + +free_memory: + free_export_memory(e_reg); + + return ret; +} + +int obmm_unexport_common(struct obmm_export_region *e_reg) +{ + int ret; + + ret = teardown_ummu(e_reg); + if (ret) + return ret; + free_export_memory(e_reg); + + return 0; +} + +/* NOTE: the operation order is not precisely the reverse order of initialization for the ease of + * error rollback. Please make careful evaluation on modifications. + */ +int obmm_unexport(const struct obmm_cmd_unexport *cmd_unexport) +{ + int ret; + struct obmm_region *reg; + struct obmm_export_region *e_reg; + + pr_info("%s: mem_id=%llu, flags=%#llx.\n", __func__, cmd_unexport->mem_id, + cmd_unexport->flags); + if (!validate_obmm_mem_id(cmd_unexport->mem_id)) + return -ENOENT; + if (cmd_unexport->flags & (~OBMM_UNEXPORT_FLAG_MASK)) { + pr_err("%s: invalid flags %#llx.\n", __func__, cmd_unexport->flags); + return -EINVAL; + } + + reg = search_deactivate_obmm_region(cmd_unexport->mem_id); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + if (reg->type != OBMM_EXPORT_REGION) { + pr_err("%s: mem_id=%llu region type mismatched.\n", __func__, cmd_unexport->mem_id); + ret = -EINVAL; + goto err_unexport_common; + } + + e_reg = container_of(reg, struct obmm_export_region, region); + ret = obmm_unexport_common(e_reg); + if (ret) + goto err_unexport_common; + + deregister_obmm_region(reg); + uninit_obmm_region(reg); + free_export_region(e_reg); + + pr_info("%s: mem_id=%llu completed.\n", __func__, cmd_unexport->mem_id); + return 0; + +err_unexport_common: + activate_obmm_region(reg); + pr_err("%s: mem_id=%llu failed, %pe.\n", __func__, cmd_unexport->mem_id, ERR_PTR(ret)); + + return ret; +} + +int set_export_vendor(struct obmm_export_region *e_reg, const void __user *vendor_info, + unsigned int vendor_len) +{ + if (vendor_len == 0) { + e_reg->vendor_info = NULL; + e_reg->vendor_len = vendor_len; + return 0; + } + if (vendor_len > OBMM_MAX_VENDOR_LEN) { + pr_err("invalid vendor_len = 0x%x, should less than 0x%x\n", vendor_len, + OBMM_MAX_VENDOR_LEN); + return -EINVAL; + } + e_reg->vendor_info = kmalloc(vendor_len, GFP_KERNEL); + if (!e_reg->vendor_info) + return -ENOMEM; + + if (copy_from_user(e_reg->vendor_info, vendor_info, vendor_len)) { + kfree(e_reg->vendor_info); + e_reg->vendor_info = NULL; + pr_err("failed to save vendor data.\n"); + return -EFAULT; + } + e_reg->vendor_len = vendor_len; + return 0; +} + +void free_export_region(struct obmm_export_region *e_reg) +{ + wait_until_dev_released(&e_reg->region); + if (e_reg->vendor_len) + kfree(e_reg->vendor_info); + + kfree(e_reg); +} diff --git a/drivers/ub/obmm/obmm_export.h b/drivers/ub/obmm/obmm_export.h new file mode 100644 index 0000000000000000000000000000000000000000..68b6ab2b0e86a6a9d2b78dcaecc916557516151b --- /dev/null +++ b/drivers/ub/obmm/obmm_export.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#ifndef OBMM_EXPORT_C_H +#define OBMM_EXPORT_C_H +int obmm_export_common(struct obmm_export_region *e_reg); + +int export_flags_to_region_flags(unsigned long *region_flags, unsigned long user_flags); + +int alloc_export_memory_pid(struct obmm_export_region *e_reg); +void free_export_memory_pid(struct obmm_export_region *e_reg); +int alloc_export_memory_pool(struct obmm_export_region *e_reg); +int obmm_unexport_common(struct obmm_export_region *e_reg); +int obmm_export_from_pool(struct obmm_cmd_export *cmd_export); +int obmm_export_pid(struct obmm_cmd_export_pid *export_pid); +int obmm_unexport(const struct obmm_cmd_unexport *cmd_unexport); + +int set_export_vendor(struct obmm_export_region *e_reg, const void __user *vendor_info, + unsigned int vendor_len); +void free_export_region(struct obmm_export_region *e_reg); +#endif diff --git a/drivers/ub/obmm/obmm_export_from_pool.c b/drivers/ub/obmm/obmm_export_from_pool.c new file mode 100644 index 0000000000000000000000000000000000000000..daa4214955da9e615638a48f97829fba4d3dbf8c --- /dev/null +++ b/drivers/ub/obmm/obmm_export_from_pool.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ubmempool_allocator.h" +#include "conti_mem_allocator.h" +#include "obmm_export.h" + +/* SGL size is specified as an unsigned int. It's best to limit the size of single SGL + * no larger than (1 << MAX_CHUNK_SHIFT) + */ +#define MAX_CHUNK_SHIFT (31) +#define MAX_CHUNK_SIZE (1U << MAX_CHUNK_SHIFT) +#define MAX_CHUNK_MASK (MAX_CHUNK_SIZE - 1) + +static unsigned long size_to_chunk_count(size_t size) +{ + return (size >> MAX_CHUNK_SHIFT) + (unsigned long)((size & MAX_CHUNK_MASK) != 0); +} + +static unsigned long memseg_list_to_chunk_count(struct list_head *head) +{ + struct memseg_node *node; + phys_addr_t start = 0, end = 0; + unsigned long chunk_count = 0; + + list_for_each_entry(node, head, list) { + /* whether the new node follows previous ones */ + if (end == node->addr) { + end += OBMM_MEMSEG_SIZE; + continue; + } + chunk_count += size_to_chunk_count(end - start); + + start = node->addr; + end = node->addr + OBMM_MEMSEG_SIZE; + } + chunk_count += size_to_chunk_count(end - start); + return chunk_count; +} + +static struct scatterlist *fill_sg_chunks(struct scatterlist *s, phys_addr_t start, size_t size, + unsigned long *filled_chunks) +{ + size_t chunk_size; + unsigned long num_chunks_to_fill; + + *filled_chunks = 0; + num_chunks_to_fill = size_to_chunk_count(size); + while (num_chunks_to_fill--) { + if (s == NULL) { + /* this error is not expected to show up in release version, thus proper + * error handling is not included + */ + pr_warn_once("bug: scatterlist is not big enough.\n"); + return s; + } + chunk_size = size > MAX_CHUNK_SIZE ? MAX_CHUNK_SIZE : size; + sg_set_page(s, pfn_to_page(start >> PAGE_SHIFT), chunk_size, 0); + s = sg_next(s); + + start += chunk_size; + size -= chunk_size; + *filled_chunks += 1; + } + return s; +} +/* Return the number of chunks to fill in the scatterlist. If @sg is NULL, the + * function performs a dry run. + */ +static struct scatterlist *fill_sg_list(struct scatterlist *s, struct list_head *head, + unsigned long *filled_chunks) +{ + struct memseg_node *node; + phys_addr_t start = 0, end = 0; + unsigned long chunk_count; + + *filled_chunks = 0; + list_for_each_entry(node, head, list) { + /* whether the new node follows previous ones */ + if (end == node->addr) { + end += OBMM_MEMSEG_SIZE; + continue; + } + + if (end != 0) { + s = fill_sg_chunks(s, start, end - start, &chunk_count); + *filled_chunks += chunk_count; + } + + /* track the first piece of new chunk */ + start = node->addr; + end = node->addr + OBMM_MEMSEG_SIZE; + } + + if (end != 0) { + s = fill_sg_chunks(s, start, end - start, &chunk_count); + *filled_chunks += chunk_count; + } + + return s; +} + +static int sg_alloc_table_from_memdesc(struct sg_table *sgt, struct mem_description_pool *desc, + gfp_t gfp_mask) +{ + unsigned long chunk_count, total_chunks, filled_chunks; + struct scatterlist *s; + int ret, i; + + total_chunks = 0; + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) + total_chunks += memseg_list_to_chunk_count(&desc->head[i]); + if (total_chunks == 0) { + pr_err("%s: no memory.\n", __func__); + return -EINVAL; + } + + ret = sg_alloc_table(sgt, total_chunks, gfp_mask); + if (ret) { + pr_err("alloc sgt failed.\n"); + return ret; + } + + s = sgt->sgl; + filled_chunks = 0; + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) { + s = fill_sg_list(s, &desc->head[i], &chunk_count); + filled_chunks += chunk_count; + } + + if (filled_chunks != total_chunks || s != NULL) { + pr_err("%s: internal error.\n", __func__); + ret = -ENOTRECOVERABLE; + goto sg_err; + } + return 0; + +sg_err: + sg_free_table(sgt); + + return ret; +} + +int alloc_export_memory_pool(struct obmm_export_region *e_reg) +{ + int ret; + unsigned int i; + struct mem_description_pool *desc; + bool allow_slow = !region_fast_alloc(&e_reg->region); + + for (i = 0; i < e_reg->node_count; i++) { + if (e_reg->node_mem_size[i] == 0) + continue; + if (e_reg->node_mem_size[i] % OBMM_MEMSEG_SIZE) { + pr_err("invalid size 0x%llx on node %d: not aligned to mempool granu %#lx\n", + e_reg->node_mem_size[i], i, OBMM_MEMSEG_SIZE); + return -EINVAL; + } + } + + pr_debug("export_from_pool: allocation started.\n"); + desc = &e_reg->mem_desc; + ret = allocate_memory_contiguous(e_reg->node_mem_size, e_reg->node_count, desc, true, + allow_slow); + if (ret) + return ret; + pr_debug("export_from_pool: allocation completed. sgtable preparation started.\n"); + + ret = sg_alloc_table_from_memdesc(&e_reg->sgt, desc, GFP_KERNEL); + if (ret) { + free_memory_contiguous(desc); + return ret; + } + pr_debug("export_from_pool: sgtable preparation completed.\n"); + + return 0; +} + +static int calculate_export_region_size(unsigned long *total_size, + struct obmm_cmd_export *cmd_export) +{ + uint64_t i; + nodemask_t nodes = NODE_MASK_NONE; + + if (cmd_export->length > OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Size list is too long: max=%d, actual_length=%lld\n", + OBMM_MAX_LOCAL_NUMA_NODES, cmd_export->length); + return -E2BIG; + } + if (cmd_export->pxm_numa > OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Invalid pxm_numa %d\n", cmd_export->pxm_numa); + return -EINVAL; + } + + *total_size = 0; + for (i = 0; i < cmd_export->length; i++) { + if (!IS_ALIGNED(cmd_export->size[i], OBMM_MEMSEG_SIZE)) { + pr_err("The size of new OBMM region 0x%llx on node %d is not aligned to OBMM memseg size %#lx.\n", + cmd_export->size[i], (int)i, OBMM_MEMSEG_SIZE); + return -EINVAL; + } + if (cmd_export->size[i] != 0 && !is_online_local_node(i)) { + pr_err("Cannot export memory from offlined or remote numa node %d\n", + (int)i); + return -ENODEV; + } + if (cmd_export->size[i] != 0) { + if (*total_size > *total_size + cmd_export->size[i]) { + pr_err("Memory size overflowed!\n"); + return -EOVERFLOW; + } + *total_size += cmd_export->size[i]; + node_set(i, nodes); + } + } + if (*total_size == 0) { + pr_err("The size of new OBMM region is 0. Non-zero value expected\n"); + return -EINVAL; + } + node_set(cmd_export->pxm_numa, nodes); + if (!nodes_on_same_package(&nodes)) { + pr_err("Cannot use memory from multiple sockets or memory and ub controller is from different sockets.\n"); + return -EINVAL; + } + + return 0; +} + +static struct obmm_export_region *alloc_region_from_cmd(struct obmm_cmd_export *cmd_export) +{ + struct obmm_export_region *e_reg; + unsigned long total_size; + int ret; + + ret = calculate_export_region_size(&total_size, cmd_export); + if (ret) + return ERR_PTR(ret); + + e_reg = kzalloc(sizeof(struct obmm_export_region), GFP_KERNEL); + if (e_reg == NULL) + return ERR_PTR(-ENOMEM); + + atomic_set(&e_reg->region.device_released, 1); + + e_reg->region.type = OBMM_EXPORT_REGION; + e_reg->region.mem_size = total_size; + e_reg->region.mem_cap = OBMM_MEM_ALLOW_CACHEABLE_MMAP | OBMM_MEM_ALLOW_NONCACHEABLE_MMAP; + e_reg->affinity = cmd_export->pxm_numa; + memcpy(e_reg->deid, cmd_export->deid, sizeof(e_reg->deid)); + ret = export_flags_to_region_flags(&e_reg->region.flags, cmd_export->flags); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + e_reg->node_count = cmd_export->length; + memcpy(e_reg->node_mem_size, cmd_export->size, sizeof(uint64_t) * e_reg->node_count); + /* compaction */ + while (e_reg->node_count - 1 > 0 && e_reg->node_mem_size[e_reg->node_count - 1] == 0) + e_reg->node_count--; + ret = set_obmm_region_priv(&e_reg->region, cmd_export->priv_len, cmd_export->priv); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + ret = set_export_vendor(e_reg, cmd_export->vendor_info, cmd_export->vendor_len); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + return e_reg; +} + +static void print_export_param(const struct obmm_cmd_export *cmd_export) +{ + unsigned int i; + + pr_info("obmm_export: len(sizes)=%#llx sizes={", cmd_export->length); + for (i = 0; i < cmd_export->length && i < OBMM_MAX_LOCAL_NUMA_NODES; i++) + if (cmd_export->size[i]) + pr_cont(" [%u]:%#llx", i, cmd_export->size[i]); + if (i < cmd_export->length) + pr_cont(" ..."); + + pr_cont(" } flags=%#llx deid=" EID_FMT64 " priv_len=%u\n", cmd_export->flags, + EID_ARGS64_H(cmd_export->deid), EID_ARGS64_L(cmd_export->deid), + cmd_export->priv_len); +} + +/* obmm_export_from_pool: create an OBMM-exported memory region. The region is + * physically located on this host and can be accessed from remote host. + * In OBMM's terminology, it is an export region. + */ +int obmm_export_from_pool(struct obmm_cmd_export *cmd_export) +{ + struct obmm_export_region *e_reg; + uint64_t uba, mem_id; + uint32_t token_id; + int ret; + + print_export_param(cmd_export); + e_reg = alloc_region_from_cmd(cmd_export); + if (IS_ERR(e_reg)) + return PTR_ERR(e_reg); + + ret = init_obmm_region(&e_reg->region); + if (ret) + goto out_free_reg; + + ret = obmm_export_common(e_reg); + if (ret) + goto out_unit_reg; + + token_id = e_reg->tokenid; + uba = e_reg->uba; + mem_id = (uint64_t)e_reg->region.regionid; + + ret = register_obmm_region(&e_reg->region); + if (ret) + goto out_unexport; + activate_obmm_region(&e_reg->region); + + cmd_export->tokenid = token_id; + cmd_export->uba = uba; + cmd_export->mem_id = mem_id; + + pr_info("obmm_export: mem_id=%llu online.\n", mem_id); + return 0; + +out_unexport: + obmm_unexport_common(e_reg); +out_unit_reg: + uninit_obmm_region(&e_reg->region); +out_free_reg: + free_export_region(e_reg); + return ret; +} diff --git a/drivers/ub/obmm/obmm_export_from_user.c b/drivers/ub/obmm/obmm_export_from_user.c new file mode 100644 index 0000000000000000000000000000000000000000..f136d3398753a3eb97e36a003c18f592b65e53d0 --- /dev/null +++ b/drivers/ub/obmm/obmm_export_from_user.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_export_region_ops.h" +#include "obmm_export.h" + +static struct task_struct *get_tsk_struct(pid_t pid) +{ + struct task_struct *task; + + if (!pid) { + get_task_struct(current); + return current; + } + + rcu_read_lock(); + task = pid_task(find_pid_ns(pid, &init_pid_ns), PIDTYPE_PID); + if (task) + get_task_struct(task); + rcu_read_unlock(); + + return task; +} + +void free_export_memory_pid(struct obmm_export_region *e_reg) +{ + struct mem_description_pid *desc = &e_reg->mem_desc_pid; + struct mm_struct *mm = NULL; + struct task_struct *tsk; + struct scatterlist *sg; + unsigned int i; + + WARN_ON(desc->pid == 0); + tsk = get_tsk_struct(desc->pid); + if (tsk && tsk->start_time != desc->start_time) { + /* if the process is still alive or its PID has not been reused */ + pr_err("pid(%d) is reused.\n", desc->pid); + put_task_struct(tsk); + tsk = NULL; + } + + if (tsk) + mm = get_task_mm(tsk); + + if (mm) { + atomic64_sub(desc->pinned, &mm->pinned_vm); + WARN_ON(modify_pgtable_prot(mm, desc->user_va, e_reg->region.mem_size, true)); + mmput(mm); + } + + if (tsk) + put_task_struct(tsk); + + WARN_ON(kernel_pgtable_set_export_invalid(e_reg, 0, e_reg->region.mem_size, false)); + + /* unpin all pages from sgt */ + for_each_sgtable_sg(&e_reg->sgt, sg, i) + unpin_user_page_range_dirty_lock(sg_page(sg), DIV_ROUND_UP(sg->length, PAGE_SIZE), + true); + + sg_free_table(&e_reg->sgt); +} + +static bool hisi_workarounds_check_page_list(struct obmm_export_region *reg, struct page **pages, + int count) +{ + nodemask_t node_mask; + unsigned int node; + int i, nid; + + nodes_clear(node_mask); + for (i = 0; i < count; i++) { + struct page *p = pages[i]; + + if (!PageHuge(p)) { + pr_err("Only hugetlbfs pages are allowed\n"); + return false; + } + +#ifdef CONFIG_NUMA + nid = page_to_nid(p); +#else + nid = 0; +#endif + if (nid < 0 || nid >= OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Invalid node ID %d.\n", nid); + return false; + } + + node_set(nid, node_mask); + reg->node_mem_size[nid] += PAGE_SIZE; + } + + for_each_node_mask(node, node_mask) { + pr_debug("Page resides in node %u\n", node); + reg->node_count = node + 1; + } + if (reg->affinity > OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Invalid pxm_numa %d\n", reg->affinity); + return false; + } + node_set(reg->affinity, node_mask); + + return nodes_on_same_package(&node_mask); +} + +int alloc_export_memory_pid(struct obmm_export_region *e_reg) +{ + unsigned long new_pinned, nrpages; + struct mem_description_pid *desc = &e_reg->mem_desc_pid; + struct page **page_list; + struct task_struct *tsk; + struct mm_struct *mm; + bool remote_mm; + int pinned, ret = 0; + int locked = 0; + + nrpages = e_reg->region.mem_size >> PAGE_SHIFT; + if (!nrpages) { + pr_err("export pages must > 1\n"); + return -EINVAL; + } + + tsk = get_tsk_struct(desc->pid); + if (!tsk) { + pr_err("get tsk from pid(%d) failed.\n", desc->pid); + return -ESRCH; + } + + mm = get_task_mm(tsk); + if (!mm) { + ret = -ESRCH; + pr_err("get mm from pid(%d) failed.\n", desc->pid); + goto drop_tsk; + } + desc->start_time = tsk->start_time; + + remote_mm = mm != current->mm; + if (!remote_mm) + desc->pid = current->tgid; + + pr_debug("exporting from %s\n", remote_mm ? "remote" : "current"); + + new_pinned = (unsigned long)atomic64_add_return(nrpages, &mm->pinned_vm); + + page_list = kvmalloc_array(nrpages, sizeof(struct page *), GFP_KERNEL); + if (!page_list) { + ret = -ENOMEM; + goto drop_pinned; + } + + pr_debug("exporting useraddr: pid(%d) va(%p) size(%#llx)\n", desc->pid, desc->user_va, + e_reg->region.mem_size); + + mmap_read_lock(mm); + locked = 1; + if (remote_mm) { + pinned = pin_user_pages_remote(mm, (uintptr_t)desc->user_va, nrpages, FOLL_WRITE, + page_list, &locked); + } else { + pinned = pin_user_pages_fast((uintptr_t)desc->user_va, nrpages, FOLL_WRITE, + page_list); + } + if (locked) + mmap_read_unlock(mm); + + if (pinned < 0) { + pr_err("pin memory failed, %d.\n", pinned); + ret = pinned; + goto free_page_list; + } + + if (pinned != (int)nrpages) { + pr_err("failed to pin user pages(%d/%lu)!\n", pinned, nrpages); + ret = -ENOMEM; + goto free_page_list; + } + + if (!hisi_workarounds_check_page_list(e_reg, page_list, nrpages)) { + pr_err("hisi workarounds check no passing.\n"); + ret = -EOPNOTSUPP; + goto free_page_list; + } + + ret = sg_alloc_table_from_pages_segment(&e_reg->sgt, page_list, nrpages, 0, + e_reg->region.mem_size, SZ_1G, GFP_KERNEL); + if (ret) { + pr_err("alloc sg table failed, %pe.\n", ERR_PTR(ret)); + goto free_page_list; + } + + ret = kernel_pgtable_set_export_invalid(e_reg, 0, e_reg->region.mem_size, true); + if (ret) + goto out_free_sg; + + ret = modify_pgtable_prot(mm, desc->user_va, e_reg->region.mem_size, false); + if (ret) + goto out_set_kernel_cacheable; + + ret = obmm_region_flush_range(&e_reg->region, 0, e_reg->region.mem_size, + OBMM_SHM_CACHE_WB_INVAL); + if (ret) + goto out_reset_pgtable_prot; + + desc->pinned = pinned; + kvfree(page_list); /* all pages saved in scatterlist */ + mmput(mm); + put_task_struct(tsk); + pr_debug("exporting memory prepared.\n"); + + return 0; + +out_reset_pgtable_prot: + WARN_ON(modify_pgtable_prot(mm, desc->user_va, e_reg->region.mem_size, true)); +out_set_kernel_cacheable: + WARN_ON(kernel_pgtable_set_export_invalid(e_reg, 0, e_reg->region.mem_size, false)); +out_free_sg: + sg_free_table(&e_reg->sgt); +free_page_list: + if (pinned > 0) + unpin_user_pages_dirty_lock(page_list, pinned, 0); + kvfree(page_list); +drop_pinned: + atomic64_sub(nrpages, &mm->pinned_vm); + mmput(mm); +drop_tsk: + put_task_struct(tsk); + return ret; +} + +static int obmm_cmd_export_pid_allowed(struct obmm_cmd_export_pid *cmd) +{ + if (cmd->flags & ~(OBMM_EXPORT_FLAG_MASK)) { + pr_err("invalid flags %#llx encountered in export_user_addr.\n", cmd->flags); + return -EINVAL; + } + if (cmd->flags & OBMM_EXPORT_FLAG_ALLOW_MMAP) { + pr_err("ALLOW_MMAP flag is not allowed in export_user_addr.\n"); + return -EINVAL; + } + if (cmd->flags & OBMM_EXPORT_FLAG_FAST) { + pr_err("FAST flag is not allowed in export_user_addr.\n"); + return -EINVAL; + } + + if (cmd->length == 0) { + pr_err("export sizeof 0 memory is not allowed.\n"); + return -EINVAL; + } + + if (cmd->length % OBMM_BASIC_GRANU) { + pr_err("export memory size is not aligned to OBMM basic granularity.\n"); + return -EINVAL; + } + + return 0; +} + +static struct obmm_export_region * +alloc_export_region_from_obmm_cmd_export_pid(const struct obmm_cmd_export_pid *export_pid) +{ + int ret; + + struct obmm_export_region *e_reg = kzalloc(sizeof(struct obmm_export_region), GFP_KERNEL); + + if (e_reg == NULL) + return ERR_PTR(-ENOMEM); + + atomic_set(&e_reg->region.device_released, 1); + + e_reg->mem_desc_pid.pid = export_pid->pid; + e_reg->mem_desc_pid.user_va = export_pid->va; + e_reg->region.mem_size = export_pid->length; + e_reg->region.type = OBMM_EXPORT_REGION; + e_reg->region.mem_cap = 0; + e_reg->affinity = export_pid->pxm_numa; + memcpy(e_reg->deid, export_pid->deid, sizeof(e_reg->deid)); + ret = export_flags_to_region_flags(&e_reg->region.flags, export_pid->flags); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + e_reg->region.flags |= OBMM_REGION_FLAG_MEMORY_FROM_USER; + ret = set_obmm_region_priv(&e_reg->region, export_pid->priv_len, export_pid->priv); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + ret = set_export_vendor(e_reg, export_pid->vendor_info, export_pid->vendor_len); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + return e_reg; +} + +static void print_export_pid_param(const struct obmm_cmd_export_pid *cmd_export_pid) +{ + pr_info("obmm_export_useraddr: pid=%d length=%#llx priv_len=%u deid=" + EID_FMT64 " vendor_len=%u\n", + cmd_export_pid->pid, cmd_export_pid->length, cmd_export_pid->priv_len, + EID_ARGS64_H(cmd_export_pid->deid), EID_ARGS64_L(cmd_export_pid->deid), + cmd_export_pid->vendor_len); +} + +int obmm_export_pid(struct obmm_cmd_export_pid *export_pid) +{ + struct obmm_export_region *e_reg; + uint64_t uba, mem_id; + uint32_t token_id; + int ret; + + print_export_pid_param(export_pid); + ret = obmm_cmd_export_pid_allowed(export_pid); + if (ret) + return ret; + + e_reg = alloc_export_region_from_obmm_cmd_export_pid(export_pid); + if (IS_ERR(e_reg)) + return PTR_ERR(e_reg); + + ret = init_obmm_region(&e_reg->region); + if (ret) + goto out_free_reg; + + ret = obmm_export_common(e_reg); + if (ret) + goto out_unit_reg; + + token_id = e_reg->tokenid; + uba = e_reg->uba; + mem_id = (uint64_t)e_reg->region.regionid; + + ret = register_obmm_region(&e_reg->region); + if (ret) + goto out_unexport; + activate_obmm_region(&e_reg->region); + + export_pid->tokenid = token_id; + export_pid->uba = uba; + export_pid->mem_id = mem_id; + + pr_info("obmm_export_useraddr: mem_id=%llu online.\n", mem_id); + return 0; + +out_unexport: + obmm_unexport_common(e_reg); +out_unit_reg: + uninit_obmm_region(&e_reg->region); +out_free_reg: + free_export_region(e_reg); + return ret; +} diff --git a/drivers/ub/obmm/obmm_export_region_ops.c b/drivers/ub/obmm/obmm_export_region_ops.c new file mode 100644 index 0000000000000000000000000000000000000000..b561224b660bccafd42618c97690168f96789dba --- /dev/null +++ b/drivers/ub/obmm/obmm_export_region_ops.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include "conti_mem_allocator.h" +#include "ubmempool_allocator.h" +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_export_region_ops.h" + +/** + * walk_fn for one obmm_export_region. + * @start: start physical address. + * @end: end physical address. + * @offset: offset of `start` address in this obmm_export_region. + * @arg: private argument. + */ +typedef int (*walk_fn)(phys_addr_t start, phys_addr_t end, unsigned long offset, void *arg); + +/* + * this function stops if walk_fn returns an error. + */ +static void walk_export_memory(const struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, walk_fn fn, void *arg) +{ + struct scatterlist *sg; + /* the offset of current sg from the region's beginning */ + unsigned long offset_from_head = 0; + unsigned int i; + int ret = 0; + + if (length == 0) + return; + + for_each_sgtable_sg(&e_reg->sgt, sg, i) { + phys_addr_t start, size; + + if (offset >= sg->length) { + offset -= sg->length; + offset_from_head += sg->length; + continue; + } + + /* now the offset is an intra-sg offset */ + start = page_to_phys(sg_page(sg)) + offset; + size = min(sg->length, offset + length) - offset; + + ret = fn(start, start + size - 1, offset_from_head + offset, arg); + if (ret) + return; + + offset = 0; + length -= size; + if (length == 0) + break; + + offset_from_head += sg->length; + } + + if (ret == 0 && length != 0) + pr_warn("%s: excessive length: %#lx bytes not walked.\n", __func__, length); +} + +struct flush_cache_info { + int ret; + int succ_flush_count; + int total_flush_count; + unsigned long cache_ops; +}; + +static int flush_cache_call(phys_addr_t start, phys_addr_t end, + unsigned long offset __always_unused, void *arg) +{ + struct flush_cache_info *info = (struct flush_cache_info *)arg; + + int ret = flush_cache_by_pa(start, end - start + 1, info->cache_ops); + + if (ret == 0) + info->succ_flush_count++; + else if (info->ret == 0) + info->ret = ret; + else if (info->ret != ret) + pr_warn("multiple flush error types detected: new flush_ret=%pe.\n", ERR_PTR(ret)); + info->total_flush_count++; + + return 0; +} + +int flush_export_region(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops) +{ + struct flush_cache_info info = { 0 }; + + info.cache_ops = cache_ops; + walk_export_memory(e_reg, offset, length, flush_cache_call, &info); + + pr_debug("export region flushed: flush_offset:0x%lx, flush_len:0x%lx, cache_ops:%lu, flush_round:%d, flush_succ_round:%d, flush_retval:%pe.\n", + offset, length, cache_ops, info.total_flush_count, info.succ_flush_count, + ERR_PTR(info.ret)); + return info.ret; +} + +struct kernel_pgtable_info { + int ret; + bool set_inval; +}; + +static int kernel_pgtable_invalid_call(phys_addr_t start, phys_addr_t end, + unsigned long offset __always_unused, void *arg) +{ + struct kernel_pgtable_info *info = (struct kernel_pgtable_info *)arg; + int ret; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = (end + 1) >> PAGE_SHIFT; + + pr_debug("call external: set_linear_mapping_invalid(set_inval=%d)\n", info->set_inval); + ret = set_linear_mapping_invalid(start_pfn, end_pfn, info->set_inval); + if (ret < 0) { + pr_err("error calling set_linear_mapping_invalid(set_inval=%d): ret=%pe\n", + info->set_inval, ERR_PTR(ret)); + } else { + pr_debug("external called: set_linear_mapping_invalid(set_inval=%d, ret=%pe)\n", + info->set_inval, ERR_PTR(ret)); + } + + info->ret = ret; + return 0; +} + +int kernel_pgtable_set_export_invalid(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, bool set_inval) +{ + struct kernel_pgtable_info info = { 0 }; + + if (offset % OBMM_MEMSEG_SIZE != 0 || length % OBMM_MEMSEG_SIZE != 0) { + pr_err("%s: invalid param: offset=%#lx length=%#lx", __func__, offset, length); + return -EINVAL; + } + + info.set_inval = set_inval; + walk_export_memory(e_reg, offset, length, kernel_pgtable_invalid_call, &info); + pr_debug("%s: [flush_offset=0x%lx, flush_len=0x%lx, set_inval=%d, ret=%pe]\n", + __func__, offset, length, set_inval, ERR_PTR(info.ret)); + return info.ret; +} + +struct map_range_info { + struct obmm_export_region *e_reg; + struct vm_area_struct *vma; + unsigned long orig_pgoff; + enum obmm_mmap_granu mmap_granu; + int ret; +}; + +static int map_range_call(phys_addr_t start, phys_addr_t end, unsigned long offset, void *arg) +{ + struct map_range_info *info = (struct map_range_info *)arg; + unsigned long length = end - start + 1; + unsigned long vm_offset; + + /* + * The offset is offset_from_head. + * + * In the case of private mapping, after remap_pfn_range is called, + * vma->vm_pgoff will be set to pfn, + * but we still need the original offset relative to the start of the region. + */ + vm_offset = offset - (info->orig_pgoff << PAGE_SHIFT); + if (info->mmap_granu == OBMM_MMAP_GRANU_PAGE) { + info->ret = remap_pfn_range(info->vma, info->vma->vm_start + vm_offset, + start >> PAGE_SHIFT, length, info->vma->vm_page_prot); + } else if (info->mmap_granu == OBMM_MMAP_GRANU_PMD) { + info->ret = remap_pfn_range_try_pmd(info->vma, info->vma->vm_start + vm_offset, + start >> PAGE_SHIFT, length, + info->vma->vm_page_prot); + } else { + pr_err("invalid mmap granu: %d\n", info->mmap_granu); + info->ret = -EINVAL; + } + if (info->ret) { + pr_err("map_export_region: failed to call remap_pfn_range on region %d: offset=%#lx, length=%#lx, ret=%pe]\n", + info->e_reg->region.regionid, offset, length, ERR_PTR(info->ret)); + return -1; + } + + return 0; +} + +int map_export_region(struct vm_area_struct *vma, struct obmm_export_region *e_reg, + enum obmm_mmap_granu mmap_granu) +{ + struct map_range_info info; + unsigned long size, offset; + + info.e_reg = e_reg; + info.vma = vma; + info.ret = 0; + info.orig_pgoff = vma->vm_pgoff; + info.mmap_granu = mmap_granu; + + size = vma->vm_end - vma->vm_start; + offset = vma->vm_pgoff << PAGE_SHIFT; + + walk_export_memory(e_reg, offset, size, map_range_call, (void *)(&info)); + + return info.ret; +} + +struct pa_info { + unsigned long pa; + unsigned long offset; + bool found; +}; + +static int search_offset_from_pa(phys_addr_t start, phys_addr_t end, unsigned long offset, + void *arg) +{ + struct pa_info *info = (struct pa_info *)arg; + + if (info->pa >= start && info->pa <= end) { + info->offset = info->pa - start + offset; + info->found = true; + /* end iterator */ + return -1; + } + + return 0; +} + +/* terminate iteration in all cases */ +static int search_pa_from_offset(phys_addr_t start, phys_addr_t end __always_unused, + unsigned long offset, void *arg) +{ + struct pa_info *info = (struct pa_info *)arg; + + if (offset != info->offset) { + pr_warn("iterator bug encountered in %s, iter.offset=%#lx, expect %#lx.\n", + __func__, offset, info->offset); + info->found = false; + return -1; + } + + info->pa = start; + info->found = true; + return -1; +} + +int get_pa_detail_export_region(const struct obmm_export_region *e_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr) +{ + struct pa_info info = { 0 }; + + info.pa = pa; + walk_export_memory(e_reg, 0, e_reg->region.mem_size, search_offset_from_pa, &info); + + /* not found */ + if (!info.found) + return -EFAULT; + + /* found */ + ext_addr->region_type = OBMM_EXPORT_REGION; + ext_addr->regionid = e_reg->region.regionid; + ext_addr->offset = info.offset; + ext_addr->tid = e_reg->tokenid; + ext_addr->uba = e_reg->uba + info.offset; + ext_addr->numa_id = NUMA_NO_NODE; + ext_addr->pa = pa; + + return 0; +} + +int get_offset_detail_export_region(const struct obmm_export_region *e_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr) +{ + struct pa_info info = { 0 }; + + if (offset >= e_reg->region.mem_size) { + pr_err("%s: invalid offset 0x%lx\n", __func__, offset); + return -EINVAL; + } + + info.offset = offset; + walk_export_memory(e_reg, offset, 1, search_pa_from_offset, &info); + if (!info.found) { + /* Offset has been checked at the beginning of this function. If the code reaches + * here, it must be an implementation error. + */ + pr_err("%s: internal bug encountered\n", __func__); + return -ENODATA; + } + + ext_addr->region_type = e_reg->region.type; + ext_addr->regionid = e_reg->region.regionid; + ext_addr->offset = offset; + ext_addr->tid = e_reg->tokenid; + ext_addr->uba = e_reg->uba + offset; + ext_addr->pa = info.pa; + /* to be decided */ + ext_addr->numa_id = NUMA_NO_NODE; + + return 0; +} diff --git a/drivers/ub/obmm/obmm_export_region_ops.h b/drivers/ub/obmm/obmm_export_region_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ef0b90326463fb1df482d914f5914b653d1e127b --- /dev/null +++ b/drivers/ub/obmm/obmm_export_region_ops.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_EXPORT_REGION_H +#define OBMM_EXPORT_REGION_H + +#include "obmm_core.h" + +int flush_export_region(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops); +int kernel_pgtable_set_export_invalid(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, bool set_nc); +int map_export_region(struct vm_area_struct *vma, struct obmm_export_region *e_reg, + enum obmm_mmap_granu mmap_granu); + +int get_pa_detail_export_region(const struct obmm_export_region *e_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr); + +int get_offset_detail_export_region(const struct obmm_export_region *e_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr); + +#endif diff --git a/drivers/ub/obmm/obmm_import.c b/drivers/ub/obmm/obmm_import.c new file mode 100644 index 0000000000000000000000000000000000000000..8982f1f3dfab985448f544e1bb3cd454f588a3c5 --- /dev/null +++ b/drivers/ub/obmm/obmm_import.c @@ -0,0 +1,633 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#include +#include +#include +#include + +#include +#include + +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_import.h" +#include "obmm_preimport.h" +#include "obmm_resource.h" +#include "obmm_addr_check.h" +#include "obmm_shm_dev.h" + +static void set_import_region_datapath(const struct obmm_import_region *i_reg, + struct obmm_datapath *datapath) +{ + datapath->scna = i_reg->scna; + datapath->dcna = i_reg->dcna; + /* shallow copy */ + datapath->seid = i_reg->seid; + datapath->deid = i_reg->deid; +} + +static unsigned long get_pa_range_mem_cap(u32 scna, phys_addr_t pa, size_t size) +{ + phys_addr_t pa_start = pa; + phys_addr_t pa_end = pa + size - 1; + unsigned long mem_cap = 0; + + if (ub_memory_validate_pa(scna, pa_start, pa_end, true)) + mem_cap |= OBMM_MEM_ALLOW_CACHEABLE_MMAP; + if (ub_memory_validate_pa(scna, pa_start, pa_end, false)) + mem_cap |= OBMM_MEM_ALLOW_NONCACHEABLE_MMAP; + if (mem_cap == 0) + pr_err("PA range invalid. Non-UBMEM memory cannot be mmaped as import memory\n"); + + return mem_cap; +} + +static int setup_pa(struct obmm_import_region *i_reg) +{ + int ret; + phys_addr_t start, end; + struct obmm_datapath datapath; + + i_reg->region.mem_cap = + get_pa_range_mem_cap(i_reg->scna, i_reg->pa, i_reg->region.mem_size); + if (i_reg->region.mem_cap == 0) + return -EINVAL; + + if (!region_preimport(&i_reg->region)) { + struct ubmem_resource *ubmem_res; + + ubmem_res = setup_ubmem_resource(i_reg->pa, i_reg->region.mem_size, false); + if (IS_ERR(ubmem_res)) { + pr_err("failed to setup ubmem resource: ret=%pe\n", ubmem_res); + return PTR_ERR(ubmem_res); + } + i_reg->ubmem_res = ubmem_res; + + return 0; + } + + start = i_reg->pa; + end = i_reg->pa + i_reg->region.mem_size - 1; + set_import_region_datapath(i_reg, &datapath); + + ret = preimport_commit_prefilled(start, end, &datapath, &i_reg->numa_id, + &i_reg->preimport_handle); + if (ret) + return ret; + + i_reg->ubmem_res = preimport_get_resource_prefilled(i_reg->preimport_handle); + + return 0; +} + +/* NOTE: do not clear PA in the teardown process. Error rollback procedure may rely on it. */ +static int teardown_pa(struct obmm_import_region *i_reg) +{ + bool preimport = region_preimport(&i_reg->region); + + if (!preimport) + return release_ubmem_resource(i_reg->ubmem_res); + /* prefilled and preimport */ + return preimport_uncommit_prefilled(i_reg->preimport_handle, i_reg->pa, + i_reg->pa + i_reg->region.mem_size - 1); +} + +static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) +{ + int ret, this_ret; + + ret = lock_save_memdev_descendents(i_reg->ubmem_res); + if (ret) + return ret; + + pr_info("call external: remove_memory_remote(nid=%d, size=%#llx)\n", + i_reg->numa_id, i_reg->region.mem_size); + ret = remove_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); + pr_debug("external called: remove_memory_remote, ret=%pe\n", ERR_PTR(ret)); + /* a full rollback is still possible: check whether this is a full teardown */ + if (ret != 0 && !force) { + pr_err("remove_memory_remote(nid=%d, size=%#llx) failed: ret=%pe.\n", + i_reg->numa_id, i_reg->region.mem_size, ERR_PTR(ret)); + goto out_recover_resource; + } + + if (region_preimport(&i_reg->region)) { + pr_info("call external: add_memory_remote(nid=%d, size=0x%llx, flags=MEMORY_KEEP_ISOLATED)\n", + i_reg->numa_id, i_reg->region.mem_size); + this_ret = add_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, + MEMORY_KEEP_ISOLATED); + pr_debug("external called: add_memory_remote() returned %d\n", this_ret); + if (this_ret == NUMA_NO_NODE) { + pr_err("failed to reset preimport memory.\n"); + ret = -ENOTRECOVERABLE; + } + } + +out_recover_resource: + restore_unlock_memdev_descendents(i_reg->ubmem_res); + return ret; +} + +static int setup_remote_numa(struct obmm_import_region *i_reg) +{ + int ret, flags; + + if (region_preimport(&i_reg->region)) + flags = 0; + else + flags = MEMORY_DIRECT_ONLINE; + + if (!(i_reg->region.mem_cap & OBMM_MEM_ALLOW_CACHEABLE_MMAP)) { + pr_err("PA range invalid. Cacheable memory cannot be managed with numa.remote\n"); + return -EINVAL; + } + + pr_info("call external: add_memory_remote(nid=%d, flags=%d)\n", + i_reg->numa_id, flags); + ret = add_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, flags); + pr_debug("external called: add_memory_remote() returned %d\n", ret); + if (ret < 0) { + pr_err("Remote NUMA creation failed: %d\n", ret); + return -EPERM; + } + WARN_ON(i_reg->numa_id != NUMA_NO_NODE && i_reg->numa_id != ret); + i_reg->numa_id = ret; + + if (!region_preimport(&i_reg->region)) { + ret = obmm_set_numa_distance(i_reg->scna, i_reg->numa_id, i_reg->base_dist); + if (ret < 0) { + pr_err("Failed to set remote numa distance: %pe\n", ERR_PTR(ret)); + goto out_teardown_remote_numa; + } + } + + return 0; +out_teardown_remote_numa: + WARN_ON(teardown_remote_numa(i_reg, true)); + return ret; +} + +static inline int occupy_addr_range(const struct obmm_import_region *i_reg) +{ + struct obmm_pa_range pa; + + if (!region_preimport(&i_reg->region)) { + pa.start = i_reg->pa; + pa.end = i_reg->pa + i_reg->region.mem_size - 1; + pa.info.user = OBMM_ADDR_USER_DIRECT_IMPORT; + pa.info.data = (void *)i_reg; + return occupy_pa_range(&pa); + } + + /* preimport + decoder_prefilled: address conflicts managed by its perimport range */ + return 0; +} + +static int free_addr_range(const struct obmm_import_region *i_reg) +{ + struct obmm_pa_range pa; + + if (!region_preimport(&i_reg->region)) { + pa.start = i_reg->pa; + pa.end = i_reg->pa + i_reg->region.mem_size - 1; + return free_pa_range(&pa); + } + + /* preimport + decoder_prefilled: address conflicts managed by its perimport range */ + return 0; +} + +static int setup_iomem_resource(struct obmm_import_region *i_reg) +{ + struct resource *memdev_res; + + memdev_res = setup_memdev_resource(i_reg->ubmem_res, i_reg->pa, + i_reg->region.mem_size, i_reg->region.regionid); + if (IS_ERR(memdev_res)) { + pr_err("memid=%d: failed to setup memdev resource: %pe\n", + i_reg->region.regionid, memdev_res); + return PTR_ERR(memdev_res); + } + + i_reg->memdev_res = memdev_res; + + return 0; +} + +static int teardown_iomem_resource(struct obmm_import_region *i_reg) +{ + int ret; + + ret = release_memdev_resource(i_reg->ubmem_res, i_reg->memdev_res); + if (ret) + pr_err("memid=%d: failed to release memdev resource: %pe\n", + i_reg->region.regionid, ERR_PTR(ret)); + + return ret; +} + +static int prepare_import_memory(struct obmm_import_region *i_reg) +{ + int ret, rollback_ret; + + if (!validate_scna(i_reg->scna)) + return -ENODEV; + + ret = occupy_addr_range(i_reg); + if (ret) + return ret; + + ret = setup_pa(i_reg); + if (ret) + goto out_free_addr_range; + + /* register numa node */ + if (region_numa_remote(&i_reg->region)) { + ret = setup_remote_numa(i_reg); + if (ret) + goto out_teardown_pa; + } else { + i_reg->numa_id = NUMA_NO_NODE; + } + + ret = setup_iomem_resource(i_reg); + if (ret) + goto out_teardown_numa; + + return 0; +out_teardown_numa: + if (region_numa_remote(&i_reg->region)) { + rollback_ret = teardown_remote_numa(i_reg, true); + if (rollback_ret) { + pr_err("failed to teardown remote numa on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } + } +out_teardown_pa: + rollback_ret = teardown_pa(i_reg); + if (rollback_ret) { + pr_err("failed to teardown PA level mapping on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } +out_free_addr_range: + rollback_ret = free_addr_range(i_reg); + if (rollback_ret) { + pr_err("failed to free address range on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } + return ret; +} + +static int release_import_memory(struct obmm_import_region *i_reg) +{ + int ret, rollback_ret, old_numa_id; + + ret = teardown_iomem_resource(i_reg); + if (ret) + return ret; + + if (region_numa_remote(&i_reg->region)) { + old_numa_id = i_reg->numa_id; + ret = teardown_remote_numa(i_reg, false); + if (ret) + goto err_teardown_numa; + } + + ret = flush_import_region(i_reg, 0, i_reg->region.mem_size, OBMM_SHM_CACHE_INVAL); + if (ret) { + pr_err("failed to flush import region, ret=%pe.\n", ERR_PTR(ret)); + goto err_flush; + } + + /* unplug memory */ + ret = teardown_pa(i_reg); + if (ret) { + pr_err("failed to release PA level mapping of region %d, ret=%pe.\n", + i_reg->region.regionid, ERR_PTR(ret)); + goto err_flush; + } + + ret = free_addr_range(i_reg); + if (ret) + goto err_free_addr_range; + + return 0; + +err_free_addr_range: + rollback_ret = setup_pa(i_reg); + if (rollback_ret) { + pr_err("failed to restore PA level mapping, ret=%pe.\n", ERR_PTR(rollback_ret)); + return -ENOTRECOVERABLE; /* rollback cannot proceed */ + } +err_flush: + if (region_numa_remote(&i_reg->region)) { + i_reg->numa_id = old_numa_id; + + rollback_ret = setup_remote_numa(i_reg); + if (rollback_ret) { + pr_err("failed to restore remote NUMA, ret=%pe.\n", ERR_PTR(rollback_ret)); + return -ENOTRECOVERABLE; /* rollback cannot proceed */ + } + } +err_teardown_numa: + rollback_ret = setup_iomem_resource(i_reg); + if (rollback_ret) { + pr_err("failed to restore iomem resource on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + return -ENOTRECOVERABLE; + } + return ret; +} + +static bool validate_pa_range(phys_addr_t pa, size_t size) +{ + /* the PA alignment of OBMM_BASIC_GRANU might be an overkill if PAGE_SIZE is not 4K. But + * this is not be a common use case for now. + */ + if (!IS_ALIGNED(pa, OBMM_BASIC_GRANU) || !IS_ALIGNED(size, OBMM_BASIC_GRANU)) { + pr_err("PA segments not aligned to OBMM basic granu: base=%#llx, size=%#zx, granularity=%#lx.\n", + pa, size, OBMM_BASIC_GRANU); + return false; + } + + if (pa == 0) { + pr_err("PA=0 unexpected.\n"); + return false; + } + if (pa + size < pa) { + pr_err("PA range overflow: base=%#llx, size=%#zx.\n", pa, size); + return false; + } + + return true; +} + +static bool validate_import_region(const struct obmm_import_region *i_reg) +{ + bool preimport; + + /* size and alignment check */ + if (i_reg->region.mem_size == 0) { + pr_err("Zero memory segment size is invalid\n"); + return false; + } + + preimport = region_preimport(&i_reg->region); + /* PA as parameter */ + if (!validate_pa_range(i_reg->pa, i_reg->region.mem_size)) + return false; + return true; +} + +static int import_to_region_flags(unsigned long *region_flags, unsigned long import_flags) +{ + *region_flags = 0; + + if (import_flags & (~OBMM_IMPORT_FLAG_MASK)) { + pr_err("Invalid import flags %#lx (unknown flags: %#lx).\n", import_flags, + import_flags & (~OBMM_IMPORT_FLAG_MASK)); + return -EINVAL; + } + if (!!(import_flags & OBMM_IMPORT_FLAG_ALLOW_MMAP) + + !!(import_flags & OBMM_IMPORT_FLAG_NUMA_REMOTE) != 1) { + pr_err("Exactly one of {ALLOW_MMAP, NUMA_REMOTE} must be specified as import flag.\n"); + return -EINVAL; + } + if ((import_flags & OBMM_IMPORT_FLAG_PREIMPORT) && + !(import_flags & OBMM_IMPORT_FLAG_NUMA_REMOTE)) { + pr_err("Preimport must be used with NUMA_REMOTE.\n"); + return -EINVAL; + } + + if (import_flags & OBMM_IMPORT_FLAG_ALLOW_MMAP) + *region_flags |= OBMM_REGION_FLAG_ALLOW_MMAP; + if (import_flags & OBMM_IMPORT_FLAG_PREIMPORT) + *region_flags |= OBMM_REGION_FLAG_PREIMPORT; + if (import_flags & OBMM_IMPORT_FLAG_NUMA_REMOTE) + *region_flags |= OBMM_REGION_FLAG_NUMA_REMOTE; + + return 0; +} + +static int init_import_region_from_cmd(const struct obmm_cmd_import *param, + struct obmm_import_region *i_reg) +{ + int ret; + bool config_numa_dist; + struct obmm_region *region = &i_reg->region; + + i_reg->region.type = OBMM_IMPORT_REGION; + i_reg->region.mem_size = param->length; + /* set flags */ + ret = import_to_region_flags(®ion->flags, param->flags); + if (ret) + return ret; + + i_reg->pa = param->addr; + + i_reg->dcna = param->dcna; + i_reg->scna = param->scna; + memcpy(i_reg->deid, param->deid, sizeof(i_reg->deid)); + memcpy(i_reg->seid, param->seid, sizeof(i_reg->seid)); + i_reg->numa_id = region_numa_remote(&i_reg->region) ? param->numa_id : NUMA_NO_NODE; + + ret = set_obmm_region_priv(region, param->priv_len, param->priv); + if (ret) + return ret; + + if (!validate_import_region(i_reg)) + return -EINVAL; + + config_numa_dist = region_numa_remote(&i_reg->region) && !region_preimport(&i_reg->region); + if (config_numa_dist && !is_numa_base_dist_valid(param->base_dist)) + return -EINVAL; + i_reg->base_dist = param->base_dist; + + /* NOTE: this function initializes the data structure but not the device */ + return 0; +} + +static void print_import_param(const struct obmm_cmd_import *cmd_import) +{ + pr_info("obmm_import: scna=%#x {pa=%#llx length=%#llx} flags=%#llx nid=%d base_dist=%u seid=" + EID_FMT64 " priv_len=%u\n", + cmd_import->scna, cmd_import->addr, cmd_import->length, cmd_import->flags, + cmd_import->numa_id, cmd_import->base_dist, EID_ARGS64_H(cmd_import->seid), + EID_ARGS64_L(cmd_import->seid), cmd_import->priv_len); +} + +int obmm_import(struct obmm_cmd_import *cmd_import) +{ + int retval, rollback_ret, numa_id; + struct obmm_import_region *i_reg; + uint64_t mem_id; + + print_import_param(cmd_import); + /* create obmm region */ + i_reg = kzalloc(sizeof(struct obmm_import_region), GFP_KERNEL); + if (i_reg == NULL) + return -ENOMEM; + + atomic_set(&i_reg->region.device_released, 1); + + /* arguments to region (logs produced by callee) */ + retval = init_import_region_from_cmd(cmd_import, i_reg); + if (retval) + goto out_free_ireg; + + retval = init_obmm_region(&i_reg->region); + if (retval) + goto out_free_ireg; + + retval = prepare_import_memory(i_reg); + if (retval) { + pr_err("Failed to prepare import memory: ret=%pe\n", ERR_PTR(retval)); + goto out_region_uninit; + } + + numa_id = i_reg->numa_id; + mem_id = (uint64_t)i_reg->region.regionid; + + retval = register_obmm_region(&i_reg->region); + if (retval) { + pr_err("Failed to create import device. ret=%pe\n", ERR_PTR(retval)); + goto out_release_memory; + } + activate_obmm_region(&i_reg->region); + + /* pass back output value */ + cmd_import->numa_id = numa_id; + cmd_import->mem_id = mem_id; + + pr_info("%s: mem_id=%llu online\n", __func__, cmd_import->mem_id); + return 0; + +out_release_memory: + rollback_ret = release_import_memory(i_reg); + if (rollback_ret) + pr_warn("Failed to release import memory on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); +out_region_uninit: + uninit_obmm_region(&i_reg->region); +out_free_ireg: + wait_until_dev_released(&i_reg->region); + kfree(i_reg); + return retval; +} + +/* NOTE: the operation order is not precisely the reverse order of initialization for the ease of + * error rollback. Please make careful evaluation on modifications. + */ +int obmm_unimport(const struct obmm_cmd_unimport *cmd_unimport) +{ + int ret; + struct obmm_region *reg; + struct obmm_import_region *i_reg; + + pr_info("%s: mem_id=%llu, flags=%#llx.\n", __func__, cmd_unimport->mem_id, + cmd_unimport->flags); + if (!validate_obmm_mem_id(cmd_unimport->mem_id)) + return -ENOENT; + if (cmd_unimport->flags & (~OBMM_UNIMPORT_FLAG_MASK)) { + pr_err("%s: invalid flags %#llx.\n", __func__, cmd_unimport->flags); + return -EINVAL; + } + + reg = search_deactivate_obmm_region(cmd_unimport->mem_id); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + if (reg->type != OBMM_IMPORT_REGION) { + pr_err("%s: mem_id=%llu region type mismatched.\n", __func__, cmd_unimport->mem_id); + ret = -EINVAL; + goto err_unimport; + } + i_reg = container_of(reg, struct obmm_import_region, region); + ret = release_import_memory(i_reg); + if (ret) + goto err_unimport; + + deregister_obmm_region(reg); + uninit_obmm_region(reg); + wait_until_dev_released(&i_reg->region); + kfree(i_reg); + + pr_info("%s: mem_id=%llu completed.\n", __func__, cmd_unimport->mem_id); + return 0; + +err_unimport: + activate_obmm_region(reg); + pr_err("%s: mem_id=%llu failed, %pe.\n", __func__, cmd_unimport->mem_id, ERR_PTR(ret)); + return ret; +} + +int flush_import_region(struct obmm_import_region *i_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops) +{ + int ret; + + ret = flush_cache_by_pa(i_reg->pa + offset, length, cache_ops); + if (ret) + return ret; + + if (cache_ops == OBMM_SHM_CACHE_WB_INVAL || cache_ops == OBMM_SHM_CACHE_WB_ONLY) + return ub_write_queue_flush(i_reg->scna); + return 0; +} + +int map_import_region(struct vm_area_struct *vma, struct obmm_import_region *i_reg, + enum obmm_mmap_granu mmap_granu) +{ + unsigned long pfn, size; + + size = vma->vm_end - vma->vm_start; + pfn = __phys_to_pfn(i_reg->pa) + vma->vm_pgoff; + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) + return remap_pfn_range(vma, vma->vm_start, pfn, size, vma->vm_page_prot); + else if (mmap_granu == OBMM_MMAP_GRANU_PMD) + return remap_pfn_range_try_pmd(vma, vma->vm_start, pfn, size, vma->vm_page_prot); + pr_err("invalid mmap granu %d\n", mmap_granu); + + return -EINVAL; +} + +int get_pa_detail_import(const struct obmm_import_region *i_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr) +{ + if (pa < i_reg->pa || pa >= i_reg->pa + i_reg->region.mem_size) + return -EFAULT; + + ext_addr->region_type = OBMM_IMPORT_REGION; + ext_addr->regionid = i_reg->region.regionid; + ext_addr->offset = pa - i_reg->pa; + ext_addr->tid = 0; + ext_addr->uba = 0; + ext_addr->numa_id = i_reg->numa_id; + ext_addr->pa = pa; + + return 0; +} + +int get_offset_detail_import(const struct obmm_import_region *i_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr) +{ + if (offset >= i_reg->region.mem_size) { + pr_err("%s: invalid offset 0x%lx\n", __func__, offset); + return -EINVAL; + } + + ext_addr->region_type = i_reg->region.type; + ext_addr->regionid = i_reg->region.regionid; + ext_addr->offset = offset; + ext_addr->tid = 0; + ext_addr->uba = 0; + ext_addr->pa = i_reg->pa + offset; + ext_addr->numa_id = i_reg->numa_id; + + return 0; +} diff --git a/drivers/ub/obmm/obmm_import.h b/drivers/ub/obmm/obmm_import.h new file mode 100644 index 0000000000000000000000000000000000000000..e24cb23fce33d1d535b6e0ec3c8b1462d8421876 --- /dev/null +++ b/drivers/ub/obmm/obmm_import.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_IMPORT_H +#define OBMM_IMPORT_H + +#include "obmm_core.h" + +int obmm_import(struct obmm_cmd_import *cmd_import); +int obmm_unimport(const struct obmm_cmd_unimport *cmd_unimport); + +int flush_import_region(struct obmm_import_region *i_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops); +int map_import_region(struct vm_area_struct *vma, struct obmm_import_region *i_reg, + enum obmm_mmap_granu mmap_granu); + +int get_pa_detail_import(const struct obmm_import_region *i_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr); + +int get_offset_detail_import(const struct obmm_import_region *i_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr); + +#endif diff --git a/drivers/ub/obmm/obmm_lowmem.c b/drivers/ub/obmm/obmm_lowmem.c new file mode 100644 index 0000000000000000000000000000000000000000..1d8cdeaa284558b2ff89fbd4f6ba0e997494809e --- /dev/null +++ b/drivers/ub/obmm/obmm_lowmem.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ubmempool_allocator.h" +#include "obmm_lowmem.h" + +static struct notifier_block lowmem_nb; +#define LOWMEM_NOTIFY_PRIORITY 80 + +/* May be called by lowmem notifier at a very high frequency. */ +static int obmm_lowmem_notify_handler(struct notifier_block *nb __always_unused, + unsigned long dummy __always_unused, void *parm) +{ + struct reclaim_notify_data *data = parm; + bool is_huge = false; + int i; + + pr_debug_ratelimited("got lowmem message. pid=%d sync=%d reason=%u\n", current->pid, + data->sync, data->reason); + + if (data->reason != RR_DIRECT_RECLAIM && + data->reason != RR_KSWAPD && + data->reason != RR_HUGEPAGE_RECLAIM) + return -ENOMEM; + + if (data->reason == RR_HUGEPAGE_RECLAIM) + is_huge = true; + data->nr_freed = 0; + for (i = 0; i < data->nr_nid; i++) { + pr_debug_ratelimited("contract memory on nid: %d\n", data->nid[i]); + data->nr_freed += ubmempool_contract(data->nid[i], is_huge) >> PAGE_SHIFT; + } + + return 0; +} + +int lowmem_notify_init(void) +{ + lowmem_nb.notifier_call = obmm_lowmem_notify_handler; + lowmem_nb.priority = LOWMEM_NOTIFY_PRIORITY; + return register_reclaim_notifier(&lowmem_nb); +} + +void lowmem_notify_exit(void) +{ + unregister_reclaim_notifier(&lowmem_nb); +} diff --git a/drivers/ub/obmm/obmm_lowmem.h b/drivers/ub/obmm/obmm_lowmem.h new file mode 100644 index 0000000000000000000000000000000000000000..7da339c6aa051a019d0e066bd3eff094f023b445 --- /dev/null +++ b/drivers/ub/obmm/obmm_lowmem.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#ifndef OBMM_LOW_MEM_H +#define OBMM_LOW_MEM_H + +int lowmem_notify_init(void); +void lowmem_notify_exit(void); + +#endif diff --git a/drivers/ub/obmm/obmm_ownership.c b/drivers/ub/obmm/obmm_ownership.c new file mode 100644 index 0000000000000000000000000000000000000000..625446b240646c8a7091af3818826083f3d35148 --- /dev/null +++ b/drivers/ub/obmm/obmm_ownership.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include + +#include "obmm_core.h" +#include "obmm_ownership.h" + +static inline uint32_t merge_counts(uint32_t read, uint32_t write) +{ + return (read << READ_SHIFT) | (write << WRITE_SHIFT); +} + +/* + * dirty -> non-dirty: INVAL_WB + * non-dirty cacheable -> NC: INVAL + * cache capability rise: NONE + * cache operation coverage: INVAL_WB > INVAL > NONE + */ +uint8_t infer_cache_ops(uint8_t cur_state, uint8_t target_state) +{ + bool cur_dirty, cur_none, target_dirty, target_none, target_clean; + uint8_t ops = OBMM_SHM_CACHE_NONE; + + cur_dirty = ((cur_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READWRITE && + (cur_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL); + target_dirty = ((target_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READWRITE && + (target_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL); + target_clean = ((target_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READONLY && + (target_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL); + cur_none = ((cur_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_NO_ACCESS || + (cur_state & OBMM_SHM_MEM_CACHE_MASK) != OBMM_SHM_MEM_NORMAL); + target_none = ((target_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_NO_ACCESS || + (target_state & OBMM_SHM_MEM_CACHE_MASK) != OBMM_SHM_MEM_NORMAL); + if (cur_dirty && target_clean) + ops = OBMM_SHM_CACHE_WB_ONLY; + else if (cur_dirty && !target_dirty) + ops = OBMM_SHM_CACHE_WB_INVAL; + else if (!cur_none && target_none) + ops = OBMM_SHM_CACHE_INVAL; + + pr_debug("%s: target_state = %u; ops = %u\n", __func__, target_state, ops); + return ops; +} + +/** + * Calculate the local page state index corresponding to the VMA address + */ +int vma_addr_to_page_idx_local(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long offset_in_vma = addr - vma->vm_start; + + return offset_in_vma >> PAGE_SHIFT; +} + +/** + * Calculate the global page state index corresponding to the VMA address + */ +static int vma_addr_to_page_idx(struct vm_area_struct *vma, + struct obmm_local_state_info *local_state_info, unsigned long addr) +{ + return local_state_info->orig_pgoff + vma_addr_to_page_idx_local(vma, addr); +} + +/* Check if new permissions conflict with existing mappings */ +static int check_target_state_allowed(uint32_t state_count, uint8_t target_mem_state) +{ + uint32_t read_count, write_count; + + read_count = GET_R_COUNTER(state_count); + write_count = GET_W_COUNTER(state_count); + + switch (target_mem_state & OBMM_SHM_MEM_ACCESS_MASK) { + case OBMM_SHM_MEM_READONLY: + fallthrough; + case OBMM_SHM_MEM_READEXEC: + if (read_count == MAX_READ_COUNT) { + pr_warn("%s: readonly map failed, read_count=%d\n", __func__, read_count); + return -EBUSY; + } + break; + case OBMM_SHM_MEM_READWRITE: + if (write_count == MAX_WRITE_COUNT) { + pr_warn("%s: readwrite map failed, write_count=%d\n", __func__, + write_count); + return -EBUSY; + } + break; + default: + break; + } + return 0; +} + +/** + * Check whether mmap operation is possible. + * The caller holds region state_mutex lock. + */ +int check_mmap_allowed(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state) +{ + int idx_offset, page_idx_start, page_count, ret; + uint32_t state_count; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, vma->vm_start); + page_count = local_state_info->npages; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + ret = check_target_state_allowed(state_count, mem_state); + if (ret) + return ret; + } + return 0; +} + +/* + * Update the count of the corresponding permission in the state. + */ +static uint32_t update_state_count(uint32_t state_count, uint8_t target_mem_state, bool inc) +{ + uint32_t read_count, write_count; + int delta; + + delta = inc ? 1 : -1; + read_count = GET_R_COUNTER(state_count); + write_count = GET_W_COUNTER(state_count); + + /* inc new permission count */ + switch (target_mem_state & OBMM_SHM_MEM_ACCESS_MASK) { + case OBMM_SHM_MEM_NO_ACCESS: + break; + case OBMM_SHM_MEM_READONLY: + fallthrough; + case OBMM_SHM_MEM_READEXEC: + read_count += delta; + break; + case OBMM_SHM_MEM_READWRITE: + write_count += delta; + break; + default: + break; + } + return merge_counts(read_count, write_count); +} + +/** + * Check whether permissions can be modified. + * The caller holds region state_mutex lock. + */ +int check_modify_ownership_allowed(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info) +{ + int idx_offset, page_idx_start, page_count, local_page_idx_start, ret; + uint32_t state_count; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + uint8_t old_state; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, update_info->start); + local_page_idx_start = vma_addr_to_page_idx_local(vma, update_info->start); + page_count = (update_info->end - update_info->start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + old_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + + /* Check for conflicts after simulating permission changes */ + /* Remove old permissions */ + state_count = update_state_count(state_count, old_state, false); + ret = check_target_state_allowed(state_count, update_info->mem_state); + if (ret) + return ret; + } + + return 0; +} + +/** + * Increase global page permission count (for mmap). + * The caller holds region state_mutex lock. + */ +void add_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state) +{ + int idx_offset, page_idx_start, page_count; + uint32_t state_count; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, vma->vm_start); + page_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + state_count = update_state_count(state_count, mem_state, true); + info->mem_state_arr[page_idx_start + idx_offset] = state_count; + } +} + +/** + * Update global page permission count and VMA local permissions. + * The caller holds region state_mutex lock. + */ +void update_ownership(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info) +{ + int idx_offset, page_idx_start, page_count, local_page_idx_start; + uint32_t state_count; + uint8_t old_state; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, update_info->start); + local_page_idx_start = vma_addr_to_page_idx_local(vma, update_info->start); + page_count = (update_info->end - update_info->start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + old_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + /* Remove old permissions */ + state_count = update_state_count(state_count, old_state, false); + /* Add new permissions */ + state_count = update_state_count(state_count, update_info->mem_state, true); + + /* update mem_state_arr */ + info->mem_state_arr[page_idx_start + idx_offset] = state_count; + /* update vma local_state_info */ + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset] = + update_info->mem_state; + } +} + +/** + * Remove global page permission count. + * The caller holds region state_mutex lock. + */ +void remove_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + int idx_offset, page_idx_start, page_count, local_page_idx_start; + uint32_t state_count; + uint8_t old_state; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, start); + local_page_idx_start = vma_addr_to_page_idx_local(vma, start); + page_count = (end - start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + old_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + + /* Remove permissions */ + state_count = update_state_count(state_count, old_state, false); + info->mem_state_arr[page_idx_start + idx_offset] = state_count; + } +} + +int init_local_state_info(struct vm_area_struct *vma, uint8_t mem_state) +{ + struct obmm_local_state_info *local_state_info; + unsigned long size; + int ret, i; + + size = vma->vm_end - vma->vm_start; + local_state_info = kzalloc(sizeof(struct obmm_local_state_info), GFP_KERNEL); + if (local_state_info == NULL) + return -ENOMEM; + + local_state_info->npages = size >> PAGE_SHIFT; + local_state_info->local_mem_state_arr = vmalloc(sizeof(uint8_t) * local_state_info->npages); + + if (local_state_info->local_mem_state_arr == NULL) { + ret = -ENOMEM; + goto out_local_state_info; + } + for (i = 0; i < local_state_info->npages; i++) + local_state_info->local_mem_state_arr[i] = mem_state; + + local_state_info->orig_pgoff = vma->vm_pgoff; + vma->vm_private_data = local_state_info; + + pr_debug("init vma local state: npages=%d, state=%#x\n", local_state_info->npages, + mem_state); + return 0; +out_local_state_info: + kfree(local_state_info); + return ret; +} + +void release_local_state_info(struct vm_area_struct *vma) +{ + struct obmm_local_state_info *local_state_info; + + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + vma->vm_private_data = NULL; + vfree(local_state_info->local_mem_state_arr); + kfree(local_state_info); +} + +/* + * Initialize the global page permission count array. + * The obmm_ownership_info is created when the region is mmapped for the first time, + * so the caller need to hold region state_mutex lock. + */ +int init_ownership_info(struct obmm_region *reg) +{ + struct obmm_ownership_info *info; + int i, ret; + + if (reg->ownership_info) + return 0; + info = kzalloc(sizeof(struct obmm_ownership_info), GFP_KERNEL); + if (info == NULL) + return -ENOMEM; + + info->npages = reg->mem_size >> PAGE_SHIFT; + info->mem_state_arr = vmalloc(sizeof(uint32_t) * info->npages); + if (info->mem_state_arr == NULL) { + ret = -ENOMEM; + goto out_free_info; + } + for (i = 0; i < info->npages; i++) + info->mem_state_arr[i] = 0; + + reg->ownership_info = info; + + pr_debug("init ownership: npages=%d, state=%#x\n", info->npages, 0U); + return 0; +out_free_info: + kfree(info); + return ret; +} + +void release_ownership_info(struct obmm_region *reg) +{ + struct obmm_ownership_info *info = reg->ownership_info; + + reg->ownership_info = NULL; + vfree(info->mem_state_arr); + kfree(info); +} diff --git a/drivers/ub/obmm/obmm_ownership.h b/drivers/ub/obmm/obmm_ownership.h new file mode 100644 index 0000000000000000000000000000000000000000..a4c49abb81170db6f18645b7d6fada73a42f9911 --- /dev/null +++ b/drivers/ub/obmm/obmm_ownership.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef OBMM_OWNERSHIP_H +#define OBMM_OWNERSHIP_H + +#include "obmm_core.h" + +#define WRITE_COUNT_BIT 16 +#define READ_COUNT_BIT 16 + +#define WRITE_MASK ((1 << WRITE_COUNT_BIT) - 1) /* 16-bit mask */ +#define READ_MASK ((1 << READ_COUNT_BIT) - 1) /* 16-bit mask */ + +#define MAX_WRITE_COUNT WRITE_MASK +#define MAX_READ_COUNT READ_MASK + +#define WRITE_SHIFT 0 +#define READ_SHIFT (WRITE_COUNT_BIT) + +#define GET_W_COUNTER(val) (((val) >> WRITE_SHIFT) & WRITE_MASK) +#define GET_R_COUNTER(val) (((val) >> READ_SHIFT) & READ_MASK) + +/* + * [ 16-31 : 0-15 ] + * state:[ Read : Write ] + * [ 65535 : 65535 ] + */ +struct obmm_ownership_info { + uint32_t *mem_state_arr; + int npages; +}; + +struct obmm_local_state_info { + uint8_t *local_mem_state_arr; + /* Original file offset in vma */ + unsigned long orig_pgoff; + int npages; +}; +int vma_addr_to_page_idx_local(struct vm_area_struct *vma, unsigned long addr); +uint8_t infer_cache_ops(uint8_t cur_state, uint8_t target_state); +int init_ownership_info(struct obmm_region *reg); +int init_local_state_info(struct vm_area_struct *vma, uint8_t mem_state); +void release_ownership_info(struct obmm_region *reg); +void release_local_state_info(struct vm_area_struct *vma); +void add_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state); +void update_ownership(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info); +int check_modify_ownership_allowed(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info); +int check_mmap_allowed(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state); +void remove_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, + unsigned long start, unsigned long end); +#endif diff --git a/drivers/ub/obmm/obmm_preimport.c b/drivers/ub/obmm/obmm_preimport.c new file mode 100644 index 0000000000000000000000000000000000000000..aac211232a881c9235284d5fa14352fd64abdbbb --- /dev/null +++ b/drivers/ub/obmm/obmm_preimport.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include "obmm_preimport.h" +#include "obmm_import.h" + +static char not_ready_dummy; +void *not_ready_ptr = ¬_ready_dummy; + +static DEFINE_MUTEX(list_mutex); +static LIST_HEAD(pr_list); + +bool is_numa_base_dist_valid(uint8_t base_dist) +{ + if (base_dist > MAX_NUMA_DIST) { + pr_err("invalid numa base distance %d: out of valid range.\n", base_dist); + return false; + } + if (base_dist != 0 && base_dist <= LOCAL_DISTANCE) { + pr_err("invalid numa base distance %d: reserved values used.\n", base_dist); + return false; + } + return true; +} + +int obmm_set_numa_distance(unsigned int cna, int nid_remote, uint8_t base_dist) +{ + int nid_local, nid, min_dist, i = 0; + int node_distances[OBMM_MAX_LOCAL_NUMA_NODES]; + int nids[OBMM_MAX_LOCAL_NUMA_NODES]; + + if (!is_numa_base_dist_valid(base_dist)) + return -EINVAL; + + nid_local = ub_mem_get_numa_id(cna); + pr_debug("for cna = %#x, get local node = %d\n", cna, nid_local); + if (nid_local < 0) { + pr_err("failed to set numa distance: bus controller with CNA=%u has nid=%d.", cna, + nid_local); + return -ENODEV; + } + + if (base_dist == 0) + return 0; + + min_dist = __node_distance(nid_local, nid_local); + + for_each_online_local_node(nid) { + nids[i] = nid; + node_distances[i++] = + min(MAX_NUMA_DIST, base_dist + __node_distance(nid_local, nid) - min_dist); + } + + return numa_remote_set_distance(nid_remote, nids, node_distances, i); +} + +int check_preimport_cmd_common(const struct obmm_cmd_preimport *cmd) +{ + /* OBMM_BASIC_GRANU is always smaller than or equal to memory_block_size_bytes(). No need + * to check for OBMM_BASIC_GRANU here. + */ + if (cmd->length % memory_block_size_bytes() != 0) { + pr_err("preimport length not aligned to %#lx.\n", + memory_block_size_bytes()); + return -EINVAL; + } + if (cmd->pa % memory_block_size_bytes()) { + pr_err("preimport base PA not aligned to %#lx.\n", + memory_block_size_bytes()); + return -EINVAL; + } + if (cmd->length > ULLONG_MAX - cmd->pa) { + pr_err("preimport PA range overflowed.\n"); + return -EINVAL; + } + if (cmd->length == 0) { + pr_err("invalid preimport length 0.\n"); + return -EINVAL; + } + if (cmd->flags & ~OBMM_PREIMPORT_FLAG_MASK) { + pr_err("undefined preimport flags specified in %#llx.\n", cmd->flags); + return -EINVAL; + } + /* scna is mandatory parameter, always required to initialize NUMA distance */ + if (!validate_scna(cmd->scna)) + return -ENODEV; + if (!is_numa_base_dist_valid(cmd->base_dist)) + return -EINVAL; + return 0; +} + +int preimport_prepare_common(struct preimport_range *pr, uint8_t base_dist) +{ + int ret, ret_err; + + if (!ub_memory_validate_pa(pr->scna, pr->start, pr->end, true)) { + pr_err("PA range invalid. Cacheable memory cannot be managed with preimport\n"); + return -EINVAL; + } + + pr_info("call external: add_memory_remote(nid=%d, flags=MEMORY_KEEP_ISOLATED)\n", + pr->numa_id); + ret = add_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1, + MEMORY_KEEP_ISOLATED); + pr_debug("external called: add_memory_remote() returned %d\n", ret); + if (ret < 0) { + pr_err("failed to call add_memory_remote(nid=%d): %pe\n", + pr->numa_id, ERR_PTR(ret)); + return -EPERM; + } + WARN_ON(pr->numa_id != NUMA_NO_NODE && pr->numa_id != ret); + pr->numa_id = ret; + + ret = obmm_set_numa_distance(pr->scna, pr->numa_id, base_dist); + if (ret < 0) { + pr_err("Failed to set numa distance for remote numa: %pe\n", ERR_PTR(ret)); + goto err_remove_memory_remote; + } + + mutex_lock(&list_mutex); + list_add(&pr->node, &pr_list); + mutex_unlock(&list_mutex); + + return 0; + +err_remove_memory_remote: + pr_info("call external: remove_memory_remote(nid=%d)\n", pr->numa_id); + ret_err = remove_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1); + pr_debug("external called: remove_memory_remote() returned %d\n", ret_err); + return ret; +} + +int preimport_release_common(struct preimport_range *pr, bool force) +{ + int ret; + + pr_info("call external: remove_memory_remote(nid=%d)\n", pr->numa_id); + ret = remove_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1); + pr_debug("external called: remove_memory_remote() returned %pe\n", ERR_PTR(ret)); + if (ret && !force) { + pr_err("failed to call remove_memory_remote(nid=%d, size=%#llx): ret=%pe.\n", + pr->numa_id, pr->end - pr->start + 1, ERR_PTR(ret)); + return ret; + } + + mutex_lock(&list_mutex); + list_del(&pr->node); + mutex_unlock(&list_mutex); + return ret; +} + +int check_preimport_datapath_common(const struct preimport_range *pr, + const struct obmm_datapath *datapath) +{ + if (pr->scna != datapath->scna || pr->dcna != datapath->dcna) { + pr_err("scna-dcna pair mismatch: <%#x, %#x> used in import; <%#x, %#x> in preimport.\n", + datapath->scna, datapath->dcna, pr->scna, pr->dcna); + return -EINVAL; + } + if (memcmp(pr->seid, datapath->seid, EID_BYTES)) { + pr_err("seid mismatch: " EID_FMT64 " used in import; " EID_FMT64 " in preimport.\n", + EID_ARGS64_H(datapath->seid), EID_ARGS64_L(datapath->seid), + EID_ARGS64_H(pr->seid), EID_ARGS64_L(pr->seid)); + return -EINVAL; + } + if (memcmp(pr->deid, datapath->deid, EID_BYTES)) { + pr_err("deid mismatch: " EID_FMT64 " used in import; " EID_FMT64 " in preimport.\n", + EID_ARGS64_H(datapath->deid), EID_ARGS64_L(datapath->deid), + EID_ARGS64_H(pr->deid), EID_ARGS64_L(pr->deid)); + return -EINVAL; + } + + return 0; +} + +static void print_preimport_param(const struct obmm_cmd_preimport *cmd) +{ + pr_info("obmm_preimport: pa=%#llx length=%#llx scna=%#x dcna=%#x flags=%#llx nid=%d base_dist=%u deid=" + EID_FMT64 " seid=" EID_FMT64 " priv_len=%u\n", + cmd->pa, cmd->length, cmd->scna, cmd->dcna, cmd->flags, cmd->numa_id, + cmd->base_dist, EID_ARGS64_H(cmd->deid), EID_ARGS64_L(cmd->deid), + EID_ARGS64_H(cmd->seid), EID_ARGS64_L(cmd->seid), cmd->priv_len); +} + +int obmm_preimport(struct obmm_cmd_preimport *cmd) +{ + int ret; + + print_preimport_param(cmd); + if (!try_module_get(THIS_MODULE)) { + pr_err("Module is dying. Reject all preimport requests\n"); + return -EPERM; + } + + ret = preimport_prepare_prefilled(cmd); + + if (ret) + module_put(THIS_MODULE); + else + pr_info("%s: preimport on nid=%d finished.\n", __func__, cmd->numa_id); + return ret; +} + +static int check_unpreimport_cmd_common(const struct obmm_cmd_preimport *cmd) +{ + if (cmd->flags & ~OBMM_UNPREIMPORT_FLAG_MASK) { + pr_err("undefined unpreimport flags specified in %#llx.\n", cmd->flags); + return -EINVAL; + } + return 0; +} + +static void print_unpreimport_param(const struct obmm_cmd_preimport *cmd) +{ + pr_info("obmm_unpreimport: pa=%#llx, length=%#llx.\n", cmd->pa, cmd->length); +} + +int obmm_unpreimport(struct obmm_cmd_preimport *cmd) +{ + int ret; + + print_unpreimport_param(cmd); + ret = check_unpreimport_cmd_common(cmd); + if (ret) + return ret; + + ret = preimport_release_prefilled(cmd->pa, cmd->pa + cmd->length - 1); + if (ret == 0) + module_put(THIS_MODULE); + pr_info("%s: unpreimport on pa=%#llx finished.\n", __func__, cmd->pa); + + return ret; +} + +static void *preimp_info_seq_start(struct seq_file *m __always_unused, loff_t *pos) +{ + mutex_lock(&list_mutex); + /* Shift the position by 1 to make place for table header. */ + if (*pos == 0) + return SEQ_START_TOKEN; + return seq_list_start(&pr_list, *pos - 1); +} + +static void *preimp_info_seq_next(struct seq_file *m __always_unused, void *v, loff_t *pos) +{ + /* SEQ_START_TOKEN is a reserved which matches with the dummy header of list. The next + * element of the dummy header is the first real element. + */ + if (v == SEQ_START_TOKEN) + v = &pr_list; + return seq_list_next(v, &pr_list, pos); +} + +static void preimp_info_seq_stop(struct seq_file *m __always_unused, void *v __always_unused) +{ + mutex_unlock(&list_mutex); +} + +#define PA_WIDTH 16 +#define CNA_WIDTH 8 +#define HALF_EID_WIDTH 18 +#define FULL_EID_WIDTH (2 * HALF_EID_WIDTH + 1) +#define NID_WIDTH 3 +static int preimp_info_seq_show(struct seq_file *m, void *v) +{ + const struct preimport_range *pr = list_entry(v, struct preimport_range, node); + + if (v == SEQ_START_TOKEN) + seq_printf(m, "%-*s - %-*s : %-*s %-*s %-*s %-*s %-*s\n", PA_WIDTH, + "pa_start", PA_WIDTH, "pa_end", CNA_WIDTH, "dcna", CNA_WIDTH, "scna", + FULL_EID_WIDTH, "deid", FULL_EID_WIDTH, "seid", NID_WIDTH, "nid"); + else + seq_printf(m, + "%-*llx - %-*llx : %#-*x %#-*x " + EID_ALIGNED_FMT64 " " EID_ALIGNED_FMT64 " %-*d\n", + PA_WIDTH, pr->start, PA_WIDTH, pr->end, CNA_WIDTH, pr->dcna, CNA_WIDTH, + pr->scna, HALF_EID_WIDTH, EID_ARGS64_H(pr->deid), HALF_EID_WIDTH, + EID_ARGS64_L(pr->deid), HALF_EID_WIDTH, EID_ARGS64_H(pr->seid), + HALF_EID_WIDTH, EID_ARGS64_L(pr->seid), NID_WIDTH, pr->numa_id); + return 0; +} + +static const struct seq_operations preimp_info_sops = { + .start = preimp_info_seq_start, + .stop = preimp_info_seq_stop, + .next = preimp_info_seq_next, + .show = preimp_info_seq_show, +}; + +static int init_preimport_info_seqfile(void) +{ + struct proc_dir_entry *p; + + p = proc_mkdir("obmm", NULL); + if (!p) { + pr_err("failed to init obmm proc dir.\n"); + return -ENOMEM; + } + p = proc_create_seq("obmm/preimport_info", 0, NULL, &preimp_info_sops); + if (!p) { + pr_err("failed to init obmm proc file.\n"); + + remove_proc_subtree("obmm", NULL); + return -ENOMEM; + } + return 0; +} + +int module_preimport_init(void) +{ + int ret; + + ret = init_preimport_info_seqfile(); + if (ret) + return ret; + + preimport_init_prefilled(); + + return 0; +} + +void module_preimport_exit(void) +{ + preimport_exit_prefilled(); + + WARN_ON(remove_proc_subtree("obmm", NULL)); +} diff --git a/drivers/ub/obmm/obmm_preimport.h b/drivers/ub/obmm/obmm_preimport.h new file mode 100644 index 0000000000000000000000000000000000000000..7f7c2d3a86a5e90b9b38242e87170612967b2c6a --- /dev/null +++ b/drivers/ub/obmm/obmm_preimport.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_PREIMPORT_H +#define OBMM_PREIMPORT_H + +#include +#include +#include "obmm_core.h" + +struct ub_mem_info; +struct resource; +struct ubmem_resource; + +struct preimport_range { + int numa_id; + + phys_addr_t start; + phys_addr_t end; + + unsigned int scna; + unsigned int dcna; + u8 seid[16]; + u8 deid[16]; + unsigned int use_count; + + struct list_head node; +}; + +extern void *not_ready_ptr; + +int check_preimport_cmd_common(const struct obmm_cmd_preimport *cmd_preimport); +int preimport_prepare_common(struct preimport_range *preimport_range, uint8_t base_dist); +int preimport_release_common(struct preimport_range *preimport_range, bool force); +int check_preimport_datapath_common(const struct preimport_range *preimport_range, + const struct obmm_datapath *datapath); + +int preimport_prepare_prefilled(struct obmm_cmd_preimport *cmd_preimport); +int preimport_release_prefilled(phys_addr_t start, phys_addr_t end); +void preimport_init_prefilled(void); +void preimport_exit_prefilled(void); + +/* belows are exposed to other components of OBMM */ +bool is_numa_base_dist_valid(uint8_t base_dist); +int obmm_set_numa_distance(unsigned int cna, int nid_remote, uint8_t base_dist); + +int obmm_preimport(struct obmm_cmd_preimport *cmd_preimport); +int obmm_unpreimport(struct obmm_cmd_preimport *cmd_preimport); +int module_preimport_init(void); +void module_preimport_exit(void); + +int preimport_commit_prefilled(phys_addr_t start, phys_addr_t end, + const struct obmm_datapath *datapath, int *p_numa_id, + void **p_handle); +int preimport_uncommit_prefilled(void *handle, phys_addr_t start, phys_addr_t end); +struct ubmem_resource *preimport_get_resource_prefilled(void *handle); + +#endif diff --git a/drivers/ub/obmm/obmm_preimport_prefilled.c b/drivers/ub/obmm/obmm_preimport_prefilled.c new file mode 100644 index 0000000000000000000000000000000000000000..50a4273d0924deb71ba3786623eb5f4c3d11bb2a --- /dev/null +++ b/drivers/ub/obmm/obmm_preimport_prefilled.c @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#include +#include +#include + +#include "obmm_preimport.h" +#include "obmm_addr_check.h" +#include "obmm_resource.h" + +struct prefilled_preimport_range { + struct preimport_range pr; + spinlock_t bitmap_lock; + unsigned long nbits; + unsigned long *bitmap; + struct ubmem_resource *ubmem_res; +}; +static DEFINE_MUTEX(preimport_mutex); + +static int create_prefilled_preimport_range(const struct obmm_cmd_preimport *cmd, + struct prefilled_preimport_range **p_ppr) +{ + struct prefilled_preimport_range *ppr; + + ppr = kzalloc(sizeof(struct prefilled_preimport_range), GFP_KERNEL); + if (ppr == NULL) + return -ENOMEM; + + ppr->pr.numa_id = cmd->numa_id; + ppr->pr.start = cmd->pa; + ppr->pr.end = cmd->pa + cmd->length - 1; + ppr->pr.scna = cmd->scna; + ppr->pr.dcna = cmd->dcna; + memcpy(ppr->pr.deid, cmd->deid, sizeof(cmd->deid)); + memcpy(ppr->pr.seid, cmd->seid, sizeof(cmd->seid)); + ppr->pr.use_count = 0; + + spin_lock_init(&ppr->bitmap_lock); + ppr->nbits = cmd->length / memory_block_size_bytes(); + ppr->bitmap = bitmap_zalloc(ppr->nbits, GFP_KERNEL); + if (!ppr->bitmap) { + pr_err("failed to allocate preimport range bitmap.\n"); + kfree(ppr); + return -ENOMEM; + } + + ppr->ubmem_res = setup_ubmem_resource(cmd->pa, cmd->length, true); + if (IS_ERR(ppr->ubmem_res)) { + pr_err("failed to setup ubmem resource on preimport: ret=%pe\n", ppr->ubmem_res); + kfree(ppr->bitmap); + kfree(ppr); + return PTR_ERR(ppr->ubmem_res); + } + + *p_ppr = ppr; + return 0; +} + +static void destroy_prefilled_preimport_range(const struct prefilled_preimport_range *ppr) +{ + release_ubmem_resource(ppr->ubmem_res); + kfree(ppr->bitmap); + kfree(ppr); +} + +static int get_pa_mapping(phys_addr_t addr, struct prefilled_preimport_range **p_ppr) +{ + int ret; + struct obmm_addr_info info; + + ret = query_pa_range(addr, &info); + if (ret) { + pr_err("No information found with PA requested.\n"); + return ret; + } + if (info.user != OBMM_ADDR_USER_PREIMPORT) { + pr_err("PA requested is not a preimport address.\n"); + return -EINVAL; + } + if (info.data == not_ready_ptr) { + pr_err("Preimport process not finished. Try later.\n"); + return -EAGAIN; + } + *p_ppr = (struct prefilled_preimport_range *)info.data; + + return 0; +} + +static int check_preimport_cmd(const struct obmm_cmd_preimport *cmd) +{ + int ret; + + ret = check_preimport_cmd_common(cmd); + if (ret) + return ret; + + if (cmd->pa == 0) { + pr_err("invalid preimport PA base addr 0.\n"); + return -EINVAL; + } + return 0; +} + +int preimport_prepare_prefilled(struct obmm_cmd_preimport *cmd) +{ + int ret; + struct prefilled_preimport_range *ppr; + struct obmm_pa_range pa_range; + + ret = check_preimport_cmd(cmd); + if (ret) + return ret; + + pa_range.start = cmd->pa; + pa_range.end = cmd->pa + cmd->length - 1; + pa_range.info.user = OBMM_ADDR_USER_PREIMPORT; + pa_range.info.data = not_ready_ptr; + ret = occupy_pa_range(&pa_range); + if (ret) + return ret; + + ret = create_prefilled_preimport_range(cmd, &ppr); + if (ret) + goto err_free_pa_range; + + ret = preimport_prepare_common(&ppr->pr, cmd->base_dist); + if (ret) + goto err_destroy_ppr; + cmd->numa_id = ppr->pr.numa_id; + + /* make ppr accessible to others, no more access! (ppr might be freed by racers.) */ + pa_range.info.data = (void *)ppr; + ret = update_pa_range(pa_range.start, &pa_range.info); + if (ret) { + cmd->numa_id = NUMA_NO_NODE; + goto err_unprepare_common; + } + + return 0; + +err_unprepare_common: + WARN_ON(preimport_release_common(&ppr->pr, true)); +err_destroy_ppr: + destroy_prefilled_preimport_range(ppr); +err_free_pa_range: + WARN_ON(free_pa_range(&pa_range)); + return ret; +} + +int preimport_release_prefilled(phys_addr_t start, phys_addr_t end) +{ + int ret; + struct obmm_pa_range pa_range; + struct prefilled_preimport_range *ppr; + + mutex_lock(&preimport_mutex); + ret = get_pa_mapping(start, &ppr); + if (ret) { + pr_err("failed to identify preimport range during unpreimport.\n"); + goto err_unlock; + } + /* must be an exact match */ + if (ppr->pr.start != start || ppr->pr.end != end) { + pr_err("requested range touches ppr but is not an exact match.\n"); + ret = -EINVAL; + goto err_unlock; + } + if (ppr->pr.use_count != 0) { + pr_err("preimport cannot be released: %u active users found.\n", ppr->pr.use_count); + ret = -EBUSY; + goto err_unlock; + } + ret = preimport_release_common(&ppr->pr, false); + if (ret) + goto err_unlock; + /* roll back is not possible from this point */ + + pa_range.start = ppr->pr.start; + pa_range.end = ppr->pr.end; + pa_range.info.user = OBMM_ADDR_USER_PREIMPORT; + pa_range.info.data = (void *)ppr; + WARN_ON(free_pa_range(&pa_range)); + + mutex_unlock(&preimport_mutex); + + destroy_prefilled_preimport_range(ppr); + return ret; + +err_unlock: + mutex_unlock(&preimport_mutex); + return ret; +} + +static int get_ppr(phys_addr_t pa, struct prefilled_preimport_range **p_ppr) +{ + int ret; + struct prefilled_preimport_range *ppr; + + mutex_lock(&preimport_mutex); + ret = get_pa_mapping(pa, &ppr); + if (ret) + goto out_unlock; + if (ppr == not_ready_ptr) { + pr_err("preimport requested not ready yet.\n"); + ret = -EAGAIN; + goto out_unlock; + } + ppr->pr.use_count += 1; + *p_ppr = ppr; +out_unlock: + mutex_unlock(&preimport_mutex); + return ret; +} + +static void put_ppr(struct prefilled_preimport_range *ppr) +{ + mutex_lock(&preimport_mutex); + WARN_ON(ppr->pr.use_count == 0); + ppr->pr.use_count -= 1; + mutex_unlock(&preimport_mutex); +} + +static int occupy_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t start, + phys_addr_t end) +{ + int ret = 0; + unsigned long bit, init_bit, end_bit, flags; + + spin_lock_irqsave(&ppr->bitmap_lock, flags); + if (start < ppr->pr.start || end > ppr->pr.end) { + pr_err("requested range is not managed by preimport.\n"); + ret = -EINVAL; + goto out_unlock; + } + init_bit = (start - ppr->pr.start) / memory_block_size_bytes(); + end_bit = (end - ppr->pr.start) / memory_block_size_bytes(); + + for (bit = init_bit; bit <= end_bit; bit++) { + if (test_bit(bit, ppr->bitmap)) { + ret = -EEXIST; + pr_err("requested range conflicts on preimport block %lu.\n", bit); + goto out_unlock; + } + } + + for (bit = init_bit; bit <= end_bit; bit++) + set_bit(bit, ppr->bitmap); + +out_unlock: + spin_unlock_irqrestore(&ppr->bitmap_lock, flags); + return ret; +} + +static int free_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t start, + phys_addr_t end) +{ + int ret = 0; + unsigned long bit, init_bit, end_bit, flags; + + spin_lock_irqsave(&ppr->bitmap_lock, flags); + if (start < ppr->pr.start || end > ppr->pr.end) { + pr_err("requested range is not managed by preimport.\n"); + ret = -EINVAL; + goto out_unlock; + } + init_bit = (start - ppr->pr.start) / memory_block_size_bytes(); + end_bit = (end - ppr->pr.start) / memory_block_size_bytes(); + + for (bit = init_bit; bit <= end_bit; bit++) { + if (!test_bit(bit, ppr->bitmap)) { + ret = -EINVAL; + pr_err("preimport block %lu never used.\n", bit); + goto out_unlock; + } + } + + for (bit = init_bit; bit <= end_bit; bit++) + clear_bit(bit, ppr->bitmap); + +out_unlock: + spin_unlock_irqrestore(&ppr->bitmap_lock, flags); + return ret; +} + +/* alignment checked by callers */ +int preimport_commit_prefilled(phys_addr_t start, phys_addr_t end, + const struct obmm_datapath *datapath, int *p_numa_id, + void **p_handle) +{ + int ret; + struct prefilled_preimport_range *ppr; + + ret = get_ppr(start, &ppr); + if (ret) + return ret; + + /* TODO: move to out */ + ret = check_preimport_datapath_common(&ppr->pr, datapath); + if (ret) + goto err_put_ppr; + + ret = occupy_ppr_blocks(ppr, start, end); + if (ret) + goto err_put_ppr; + + *p_numa_id = ppr->pr.numa_id; + *p_handle = (void *)ppr; + return 0; + +err_put_ppr: + put_ppr(ppr); + return ret; +} + +int preimport_uncommit_prefilled(void *handle, phys_addr_t start, phys_addr_t end) +{ + int ret; + struct prefilled_preimport_range *ppr; + + ppr = (struct prefilled_preimport_range *)handle; + ret = free_ppr_blocks(handle, start, end); + if (ret) + return ret; + + put_ppr(ppr); + return ret; +} + +struct ubmem_resource *preimport_get_resource_prefilled(void *handle) +{ + return ((struct prefilled_preimport_range *)handle)->ubmem_res; +} + +void preimport_init_prefilled(void) +{ +} + +void preimport_exit_prefilled(void) +{ +} diff --git a/drivers/ub/obmm/obmm_resource.c b/drivers/ub/obmm/obmm_resource.c new file mode 100644 index 0000000000000000000000000000000000000000..6c110c664e01c9b21527c025afaab0014ad2f4bd --- /dev/null +++ b/drivers/ub/obmm/obmm_resource.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright(c) Huawei Technologies Co., Ltd. 2025 All rights reserved. + * Description: OBMM Framework's implementations. + * + * OBMM utilizes the iomem resource tree infrastructure to expose the physical address range of each + * OBMM memory device to other kernel components. External accessors should never modify the + * resource tree structure (with or without resource lock) and should take the resource lock while + * traversing the resource tree edges. "walk_iomem_res_desc" declared in serves as + * a valid accessing candidate. + * + * Resource Tree Structure: + * + * OBMM introduces two layers in the iomem resource tree: + * + * 1. The UBMEM resource: The UBMEM resource models a range of UB memory physical address range. + * The range of memory maps or may map remote memory. It is always a direct child of the iomem + * resource root node. + * + * 2. The OBMM memory device: The OBMM memory device resource models a range of UB memory physical + * address range which is associated with an OBMM memory device. It is always a leaf of the + * iomem resource tree. + * + * If the imported memory is manged with remote NUMA, there might an extra interior layers between + * the two metioned above. In our context we refer to it as NUMA resource. + * + * Below is an example: + * + * (iomem_resource) + * PREIMPORT_UBMEM + * System RAM (Remote) + * MEMID_1 + * MEMID_2 + * DIRECT_IMPORT_UBMEM + * System RAM (Remote) + * MEMID_3 + * DIRECT_IMPORT_UBMEM + * MEMID_4 + * + * Things become complicated when we are handling the removal of a memory device which shares the + * preimport UBMEM resource with memory devices which outlives itself. Current NUMA remote + * implementation would remove the "System RAM (Remote)" resource first and re-insert the resource + * afterwards. The living memory devices would not be preserved. Therefore it is necessary to save + * all the memory device descendents before shutting down the part of the preimport memory. + * + * Concurrency Notes: + * + * As metioned in the beginning, for external accessors, everything under ubmem_resource in the + * iomem_resource tree might be read with kernel resource_lock but should never be modified (even + * with the lock). The only exception would be memory hotplug / NUMA remote setup process which is + * triggered by OBMM. With this presumption it is safe for OBMM itself to traverse the resource tree + * without kernel resource lock. On contrast, all modifications to the subtree takes the kernel + * resource lock to avoid racing with external readers. Lastly, there is a mutex per UBMEM resource + * which synchronizes internal accesses to the subtree. + */ + +#define pr_fmt(fmt) "OBMM: resource:" fmt + +#include +#include + +#include "obmm_resource.h" + +#define MEMID_IORES_PREFIX "MEMID_" + +struct ubmem_resource { + struct resource res; + bool preimport; + + /* serialize the children save-restore process (only necessary for preimport range) */ + struct mutex mutex; + struct resource *memdev_res_shelter; +}; + +struct ubmem_resource *setup_ubmem_resource(phys_addr_t pa, resource_size_t size, bool preimport) +{ + int ret; + struct ubmem_resource *ubmem_res; + + ubmem_res = kzalloc(sizeof(struct ubmem_resource), GFP_KERNEL); + if (!ubmem_res) + return ERR_PTR(-ENOMEM); + + ubmem_res->res.start = pa; + ubmem_res->res.end = pa + size - 1; + ubmem_res->res.name = preimport ? "PREIMPORT_UBMEM" : "DIRECT_IMPORT_UBMEM"; + ubmem_res->res.flags = IORESOURCE_MEM; + + ubmem_res->preimport = preimport; + mutex_init(&ubmem_res->mutex); + + ret = insert_resource(&iomem_resource, &ubmem_res->res); + if (ret) { + kfree(ubmem_res); + return ERR_PTR(ret); + } + return ubmem_res; +} + +int release_ubmem_resource(struct ubmem_resource *ubmem_res) +{ + int ret; + + ret = remove_resource(&ubmem_res->res); + if (ret) + return ret; + mutex_destroy(&ubmem_res->mutex); + kfree(ubmem_res); + return 0; +} + +/* + * Move memdev_res saved in the sheltered list back under the refreshed NUMA resource. This function + * should be called only when the NUMA resource is present. + */ +static void restore_sheltered_memdev_locked(struct ubmem_resource *ubmem_res) +{ + struct resource *numa_res, *memdev_res; + + numa_res = ubmem_res->res.child; + + memdev_res = ubmem_res->memdev_res_shelter; + while (memdev_res) { + ubmem_res->memdev_res_shelter = memdev_res->sibling; + + memdev_res->sibling = NULL; + WARN_ON(request_resource(numa_res, memdev_res)); + + memdev_res = ubmem_res->memdev_res_shelter; + } +} + +/* + * Take memory device resource under the NUMA resource to be reset and chain them in the sheltered + * list + */ +int lock_save_memdev_descendents(struct ubmem_resource *ubmem_res) +{ + int ret; + struct resource *numa_res, *memdev_res, *next, **shelter_tail; + + if (!ubmem_res->preimport) + return 0; + + mutex_lock(&ubmem_res->mutex); + + numa_res = ubmem_res->res.child; + if (!numa_res) + return 0; + WARN_ON(numa_res->sibling != NULL); + + memdev_res = numa_res->child; + shelter_tail = &ubmem_res->memdev_res_shelter; + while (memdev_res) { + next = memdev_res->sibling; + + ret = release_resource(memdev_res); + if (ret) { + pr_err("failed to remove memdev resource %s: unexpected racing happened.\n", + memdev_res->name ? memdev_res->name : "(null)"); + goto out_restore; + } + memdev_res->child = memdev_res->parent = memdev_res->sibling = NULL; + *shelter_tail = memdev_res; + + shelter_tail = &memdev_res->sibling; + memdev_res = next; + } + return 0; + +out_restore: + restore_sheltered_memdev_locked(ubmem_res); + mutex_unlock(&ubmem_res->mutex); + return ret; +} + +void restore_unlock_memdev_descendents(struct ubmem_resource *ubmem_res) +{ + if (!ubmem_res->preimport) + return; + + restore_sheltered_memdev_locked(ubmem_res); + mutex_unlock(&ubmem_res->mutex); +} + +struct resource *setup_memdev_resource(struct ubmem_resource *ubmem_res, phys_addr_t pa, + resource_size_t size, int mem_id) +{ + int ret; + struct resource *memdev_res, *parent; + + memdev_res = kzalloc(sizeof(struct resource), GFP_KERNEL); + if (!memdev_res) + return ERR_PTR(-ENOMEM); + + memdev_res->start = pa; + memdev_res->end = pa + size - 1; + memdev_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + memdev_res->name = kasprintf(GFP_KERNEL, MEMID_IORES_PREFIX "%d", mem_id); + if (!memdev_res->name) { + ret = -ENOMEM; + goto err_free_res; + } + + /* Be a descendent of the UBMEM resource */ + parent = &ubmem_res->res; + mutex_lock(&ubmem_res->mutex); + + /* if NUMA resource is present, make itself a child of the NUMA resource */ + if (parent->child) + parent = parent->child; + + ret = request_resource(parent, memdev_res); + if (ret) { + pr_err("failed to request resource under parent %s, ret=%pe.\n", parent->name, + ERR_PTR(ret)); + goto err_unlock; + } + + mutex_unlock(&ubmem_res->mutex); + return memdev_res; + +err_unlock: + mutex_unlock(&ubmem_res->mutex); + kfree(memdev_res->name); +err_free_res: + kfree(memdev_res); + return ERR_PTR(ret); +} + +int release_memdev_resource(struct ubmem_resource *ubmem_res, struct resource *memdev_res) +{ + int ret; + + mutex_lock(&ubmem_res->mutex); + ret = release_resource(memdev_res); + mutex_unlock(&ubmem_res->mutex); + + if (ret) + return ret; + kfree(memdev_res->name); + kfree(memdev_res); + return 0; +} diff --git a/drivers/ub/obmm/obmm_resource.h b/drivers/ub/obmm/obmm_resource.h new file mode 100644 index 0000000000000000000000000000000000000000..067ac944e785d41002d097a7f47d544f8bb375b4 --- /dev/null +++ b/drivers/ub/obmm/obmm_resource.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright(c) Huawei Technologies Co., Ltd. 2025 All rights reserved. + * Description: OBMM Framework's implementations. + */ +#ifndef OBMM_RESOURCE_H +#define OBMM_RESOURCE_H + +#include + +struct ubmem_resource; + +struct ubmem_resource *setup_ubmem_resource(phys_addr_t pa, resource_size_t size, bool preimport); +int release_ubmem_resource(struct ubmem_resource *ubmem_res); +int lock_save_memdev_descendents(struct ubmem_resource *ubmem_res); +void restore_unlock_memdev_descendents(struct ubmem_resource *ubmem_res); + +struct resource *setup_memdev_resource(struct ubmem_resource *ubmem_res, phys_addr_t pa, + resource_size_t size, int mem_id); +int release_memdev_resource(struct ubmem_resource *ubmem_res, struct resource *memdev_res); + +#endif diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c new file mode 100644 index 0000000000000000000000000000000000000000..0814c37d12b46ca600fb95f5a96868548003d44d --- /dev/null +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -0,0 +1,993 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include + +#include "obmm_cache.h" +#include "obmm_sysfs.h" +#include "obmm_export_region_ops.h" +#include "obmm_import.h" +#include "obmm_ownership.h" +#include "obmm_shm_dev.h" + +static dev_t obmm_devt; + +static const char *obmm_shm_region_name = "OBMM_SHMDEV"; +static const char *obmm_shm_rootdev_name = "obmm"; +static struct device *obmm_shm_rootdev; + +static int scan_and_flush(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info); + +/** + * Convert VM flags to mem state + */ +static unsigned long get_vma_mem_state(const vm_flags_t vm_flags, bool cacheable) +{ + unsigned long mem_state; + + if (vm_flags & VM_WRITE) + mem_state = OBMM_SHM_MEM_READWRITE; + else if ((vm_flags & VM_READ) && (vm_flags & VM_EXEC)) + mem_state = OBMM_SHM_MEM_READEXEC; + else if (vm_flags & VM_READ) + mem_state = OBMM_SHM_MEM_READONLY; + else + mem_state = OBMM_SHM_MEM_NO_ACCESS; + + if (cacheable && mem_state != OBMM_SHM_MEM_NO_ACCESS) + mem_state |= OBMM_SHM_MEM_NORMAL; + else + mem_state |= OBMM_SHM_MEM_NORMAL_NC; + pr_debug("VMA init mem_state: vma_flags=0x%lx, cacheable=%d, mem_state=0x%lx\n", + vm_flags, cacheable, mem_state); + return mem_state; +} + +/* VMA operations for obmm-mmaped VMA */ +static void obmm_vma_open(struct vm_area_struct *vma) +{ + pr_debug("VMA opened range (0x%lx-0x%lx)\n", vma->vm_start, vma->vm_end); +} + +static void obmm_vma_close(struct vm_area_struct *vma) +{ + struct obmm_region *reg; + int ret; + + reg = (struct obmm_region *)vma->vm_file->private_data; + + mutex_lock(®->state_mutex); + /* cc-mmap */ + if (reg->mmap_mode == OBMM_MMAP_NORMAL && reg->ownership_info) { + /* flush cache */ + struct obmm_cmd_update_range update_info = { + .start = vma->vm_start, + .end = vma->vm_end, + .mem_state = OBMM_SHM_MEM_NO_ACCESS, + .cache_ops = OBMM_SHM_CACHE_INFER, + }; + ret = scan_and_flush(reg, vma, &update_info); + if (ret) + pr_err("vma close: failed to flush cache\n"); + + remove_mapping_permission(reg, vma, vma->vm_start, vma->vm_end); + release_local_state_info(vma); + } + + reg->mmap_count--; + if (reg->mmap_count == 0) { + /* reset mmap_mode */ + reg->mmap_mode = OBMM_MMAP_INIT; + } + mutex_unlock(®->state_mutex); + pr_debug("obmm_shmdev munmap: mem_id=%d pid=%d vma=[%#lx, %#lx]\n", reg->regionid, + current->pid, vma->vm_start, vma->vm_end); +} + +static int obmm_vma_may_split(struct vm_area_struct *vma __always_unused, + unsigned long addr __always_unused) +{ + /* not supported */ + pr_err("VMA may split at 0x%lx (range: 0x%lx-0x%lx), but split not supported\n", addr, + vma->vm_start, vma->vm_end); + return -EOPNOTSUPP; +} + +static int obmm_vma_mremap(struct vm_area_struct *vma __always_unused) +{ + pr_warn("mremap not supported\n"); + return -EOPNOTSUPP; +} + +static bool validate_update_info(const struct obmm_region *region, + const struct obmm_cmd_update_range *update_info, + bool cacheable) +{ + bool valid; + + if (!cacheable) { + pr_err("Ownership operation is not applicable to o-sync mmap %d.\n", + region->regionid); + return false; + } + if (!region->ownership_info) { + pr_err("error updating ownership: ownership of memdev %d not initialized.\n", + region->regionid); + return false; + } + + valid = update_info->start < update_info->end && + IS_ALIGNED(update_info->start, PAGE_SIZE) && + IS_ALIGNED(update_info->end, PAGE_SIZE); + if (!valid) + pr_err("{pid=%d, start=%#llx end=%#llx is not a valid page range from memdev %d.\n", + current->pid, update_info->start, update_info->end, region->regionid); + return valid; +} +static int obmm_vma_mprotect(struct vm_area_struct *vma __always_unused, + unsigned long start __always_unused, unsigned long end __always_unused, + unsigned long newflags __always_unused) +{ + pr_warn("mprotect not supported\n"); + return -EOPNOTSUPP; +} +static vm_fault_t obmm_vma_fault(struct vm_fault *vmf __always_unused) +{ + pr_warn("Unexpected fault\n"); + return VM_FAULT_SIGBUS; +} +static int obmm_vma_access(struct vm_area_struct *vma __always_unused, + unsigned long addr __always_unused, void *buf __always_unused, + int len __always_unused, int write __always_unused) +{ + pr_warn("access not supported\n"); + return -EOPNOTSUPP; +} +static const char *obmm_vma_name(struct vm_area_struct *vma __always_unused) +{ + return "OBMM_SHM"; +} + +static unsigned long obmm_pagesize(struct vm_area_struct *vma) +{ + struct file *filp = vma->vm_file; + struct obmm_region *reg = (struct obmm_region *)filp->private_data; + + if (reg->mmap_granu == OBMM_MMAP_GRANU_PMD) + return PMD_SIZE; + else + return PAGE_SIZE; +} + +static const struct vm_operations_struct obmm_vm_ops = { + .open = obmm_vma_open, + .close = obmm_vma_close, + .may_split = obmm_vma_may_split, + .mremap = obmm_vma_mremap, + .mprotect = obmm_vma_mprotect, + .fault = obmm_vma_fault, + .access = obmm_vma_access, + .name = obmm_vma_name, + .pagesize = obmm_pagesize, +}; + +static int obmm_shm_fops_open(struct inode *inode, struct file *file) +{ + struct obmm_region *reg; + bool cacheable; + + reg = container_of(inode->i_cdev, struct obmm_region, cdevice); + file->private_data = reg; + + pr_debug("obmm_shmdev open: mem_id=%d pid=%d f_mode=%#x f_flags=%#x\n", reg->regionid, + current->pid, file->f_mode, file->f_flags); + + cacheable = !(file->f_flags & O_SYNC); + if (cacheable && !(reg->mem_cap & OBMM_MEM_ALLOW_CACHEABLE_MMAP)) { + pr_err("Noncacheable region %d cannot be mmaped with cachable mode.\n", + reg->regionid); + return -EPERM; + } + if (!cacheable && !(reg->mem_cap & OBMM_MEM_ALLOW_NONCACHEABLE_MMAP)) { + pr_err("Cacheable region %d cannot be mmaped with noncachable mode.\n", + reg->regionid); + return -EPERM; + } + if (try_get_obmm_region(reg) == NULL) { + pr_err("obmm_shmdev open: The device is in creation or destruction process. Open failed.\n"); + return -EAGAIN; + } + + pr_debug("obmm_shmdev open: mem_id=%d pid=%d completed.\n", reg->regionid, current->pid); + + return 0; +} + +static int obmm_shm_fops_flush(struct file *file __always_unused, fl_owner_t owner __always_unused) +{ + return 0; +} + +static int obmm_shm_fops_release(struct inode *inode __always_unused, struct file *file) +{ + struct obmm_region *reg = (struct obmm_region *)file->private_data; + + pr_debug("obmm_shmdev release: mem_id=%d pid=%d\n", reg->regionid, current->pid); + put_obmm_region(reg); + + return 0; +} + +static int map_obmm_region(struct vm_area_struct *vma, struct obmm_region *reg, + enum obmm_mmap_granu mmap_granu) +{ + struct obmm_export_region *e_reg; + struct obmm_import_region *i_reg; + + pr_debug("mmap region %d: size=%#llx\n", reg->regionid, reg->mem_size); + if (reg->type == OBMM_IMPORT_REGION) { + i_reg = container_of(reg, struct obmm_import_region, region); + return map_import_region(vma, i_reg, mmap_granu); + } + + e_reg = container_of(reg, struct obmm_export_region, region); + return map_export_region(vma, e_reg, mmap_granu); +} + +/* Return page table protection bits. + * @mem_state must be validated by caller. + */ +static pgprot_t mem_state_to_pgprot(unsigned long mem_state) +{ + pgprot_t pgprot; + + /* initialize pgprot to be normal memory pgprot with certain access rights */ + if ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READONLY) + pgprot = PAGE_READONLY; + else if ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READEXEC) + pgprot = PAGE_READONLY_EXEC; + else if ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READWRITE) + pgprot.pgprot = _PAGE_READONLY & ~PTE_RDONLY; + else + pgprot = PAGE_NONE; + + /* modify cacheability attribute if necessary */ + if ((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL_NC) + pgprot = pgprot_writecombine(pgprot); + else if ((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_DEVICE) + pgprot = pgprot_noncached(pgprot); + + return pgprot; +} + +static void print_mmap_param(const struct file *file, const struct vm_area_struct *vma) +{ + const struct obmm_region *reg = (struct obmm_region *)file->private_data; + const char *vm_flags_desc, *f_flags_desc; + + pr_debug("obmm_shmdev mmap: mem_id=%d pid=%d vma=[%#lx, %#lx] pgoff=%#lx ", reg->regionid, + current->pid, vma->vm_start, vma->vm_end, vma->vm_pgoff); + + if (vma->vm_flags & VM_WRITE) + vm_flags_desc = "W"; + else if ((vma->vm_flags & VM_READ) && (vma->vm_flags & VM_EXEC)) + vm_flags_desc = "RX"; + else if (vma->vm_flags & VM_READ) + vm_flags_desc = "R"; + else + vm_flags_desc = "N"; + + if (file->f_flags & O_SYNC) + f_flags_desc = "O_SYNC"; + else + f_flags_desc = "not O_SYNC"; + + pr_debug("vm_flags=%#lx(%s) f_flags=%#x(%s)\n", vma->vm_flags, vm_flags_desc, file->f_flags, + f_flags_desc); +} + +static bool validate_perm(struct file *file, vm_flags_t vm_flags) +{ + if (((vm_flags & VM_READ) && !(file->f_mode & FMODE_READ)) || + ((vm_flags & VM_WRITE) && !(file->f_mode & FMODE_WRITE)) || + ((vm_flags & VM_EXEC) && !(file->f_mode & FMODE_READ))) { + pr_err("%s false: vm_flags: %#lx, f_mode: %#x\n", __func__, vm_flags, file->f_mode); + return false; + } + return true; +} + +static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct obmm_region *reg = (struct obmm_region *)file->private_data; + unsigned long size, offset; + uint8_t mem_state; + enum obmm_mmap_mode old_mmap_mode; + enum obmm_mmap_granu mmap_granu, init_mmap_granu; + int ret; + bool cacheable, o_sync; + + print_mmap_param(file, vma); + if (!region_allow_mmap(reg)) { + pr_err("mmap region %d: not allow to be mmaped\n", reg->regionid); + return -EPERM; + } + + if (!validate_perm(file, vma->vm_flags)) { + pr_err("mmap region %d: invalid vma permission\n", reg->regionid); + return -EPERM; + } + + o_sync = file->f_flags & O_SYNC; + size = vma->vm_end - vma->vm_start; + offset = vma->vm_pgoff << PAGE_SHIFT; + + if (offset & OBMM_MMAP_FLAG_HUGETLB_PMD) { + pr_debug("trying hugepage mmap\n"); + offset &= ~OBMM_MMAP_FLAG_HUGETLB_PMD; + if (vma->vm_start % PMD_SIZE || vma->vm_end % PMD_SIZE) { + pr_err("error running huge mmap for not pmd-aligned vma: %#lx-%#lx\n", + vma->vm_start, vma->vm_end); + return -EINVAL; + } + mmap_granu = OBMM_MMAP_GRANU_PMD; + } else { + mmap_granu = OBMM_MMAP_GRANU_PAGE; + } + init_mmap_granu = reg->mmap_granu; + if (reg->mmap_granu == OBMM_MMAP_GRANU_NONE) { + reg->mmap_granu = mmap_granu; + } else if (reg->mmap_granu != mmap_granu) { + pr_err("map with PAGE_SIZE and PMD_SIZE granu should not be mixed on the same region\n"); + ret = -EPERM; + goto err_reset_mmap_granu; + } + + vma->vm_pgoff = offset >> PAGE_SHIFT; + + if (offset >= reg->mem_size || size > reg->mem_size - offset) { + pr_err("mmap region %d: offset:%#lx, size:%#lx over region size: %#llx", + reg->regionid, offset, size, reg->mem_size); + ret = -EINVAL; + goto err_reset_mmap_granu; + } + + /* + * VM flags considerations + * Compared to legacy device memory, OBMM memory has many different properties: + * 1. does not have side-effects on access (VM_IO not set) + * 2. may be used for core dump output (VM_DONTDUMP not set) + * On the other hand, OBMM and traditional device memory do have some similarities: + * 3. the mapping cannot be inherited on process fork (VM_DONTCOPY set) for now + * 4. VMA merging and expanding makes no sense (VM_DONTEXPAND set) + * 5. the VMA should not be swapped out (VM_LOCKED set) + * 6. mappable import region does not has struct page; mappable export region haves struct + * page, but cannot work as expected since its kernel linear mapping might be modified + * (VM_PFNMAP set) + */ + vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_LOCKED | VM_PFNMAP); + cacheable = o_sync ? false : true; + mem_state = get_vma_mem_state(vma->vm_flags, cacheable); + + /* initial VMA page prot used by the mapping process -- will be changed later */ + vma->vm_page_prot = mem_state_to_pgprot(mem_state); + + mutex_lock(®->state_mutex); + old_mmap_mode = reg->mmap_mode; + + if ((o_sync && reg->mmap_mode == OBMM_MMAP_NORMAL) || + (!o_sync && reg->mmap_mode == OBMM_MMAP_OSYNC)) { + pr_err("region cannot be mapped to cc and nc at the same time"); + ret = -EPERM; + goto err_mutex_unlock; + } + if (reg->mmap_mode == OBMM_MMAP_INIT) + reg->mmap_mode = o_sync ? OBMM_MMAP_OSYNC : OBMM_MMAP_NORMAL; + + /* cc mmap */ + if (reg->mmap_mode == OBMM_MMAP_NORMAL) { + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) { + ret = init_local_state_info(vma, mem_state); + if (ret) { + pr_err("init local state info failed: %pe\n", ERR_PTR(ret)); + goto reset_cur_osync; + } + /* + * initialize region-level ownership info if not done yet. + * once initialized, the OBMM ownership will persist until + * the memdev goes offline + */ + ret = init_ownership_info(reg); + if (ret) + goto err_release_local_state_info; + /* + * after ownership_info initialized, mmap_granu should not be + * reset to OBMM_MMAP_GRANU_NONE. + */ + init_mmap_granu = reg->mmap_granu; + ret = check_mmap_allowed(reg, vma, mem_state); + if (ret) + goto err_release_local_state_info; + } + + ret = map_obmm_region(vma, reg, mmap_granu); + if (ret) { + pr_err("Failed to mmap region %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto err_release_local_state_info; + } + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) + add_mapping_permission(reg, vma, mem_state); + } else { + /* cc-region with nc-mmap(o-sync) */ + ret = map_obmm_region(vma, reg, mmap_granu); + if (ret) { + pr_err("Failed to mmap region %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto reset_cur_osync; + } + } + reg->mmap_count++; + mutex_unlock(®->state_mutex); + /* + * since OBMM allows changing protection by pages and we will not split + * VMA in near future. Therefore a mismatch between PTE protection and + * VMA flags is inevitable. Our current approach is to avoid all + * possible faults to change the PTE protection on the fly. Here we + * just set the page protection to the most restrictive one to guard + * against unexpected access. + */ + vma->vm_page_prot = vm_get_page_prot(VM_NONE); + if (!cacheable) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vm_flags_clear(vma, VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE); + + vma->vm_ops = &obmm_vm_ops; + + pr_debug("obmm_shmdev mmap: mem_id=%d pid=%d vma=[%#lx, %#lx] mapped: mem_state=%#x.\n", + reg->regionid, current->pid, vma->vm_start, vma->vm_end, mem_state); + + return 0; + +err_release_local_state_info: + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) + release_local_state_info(vma); +reset_cur_osync: + if (old_mmap_mode == OBMM_MMAP_INIT) + reg->mmap_mode = OBMM_MMAP_INIT; +err_mutex_unlock: + mutex_unlock(®->state_mutex); +err_reset_mmap_granu: + reg->mmap_granu = init_mmap_granu; + return ret; +} + +/* + * Verify whether mem_state is valid. + */ +static bool validate_state(uint8_t mem_state) +{ + if (mem_state & ~(OBMM_SHM_MEM_CACHE_MASK | OBMM_SHM_MEM_ACCESS_MASK)) { + pr_err("Invalid mem_state: %#x", mem_state); + return false; + } + + /* validate cacheability field */ + if ((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_CACHE_RESV) { + pr_err("Invalid mem_state: %#x -- reserved cacheability", mem_state); + return false; + } + /* currently no need to validate access permission field */ + + if (((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READEXEC) && + (((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_DEVICE) || + (mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL_NC)) { + pr_err("Bad target mem_state configuration: NC memory cannot be executable\n"); + return false; + } + + if (((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL_NC) && + ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) != OBMM_SHM_MEM_NO_ACCESS)) { + pr_err("Invalid access state transition: cannot set cacheable region to an accessible but non-cacheable state.\n"); + return false; + } + + return true; +} + +static bool validate_cache_ops(uint8_t cache_ops) +{ + if (cache_ops != OBMM_SHM_CACHE_NONE && + cache_ops != OBMM_SHM_CACHE_INVAL && + cache_ops != OBMM_SHM_CACHE_WB_INVAL) { + pr_err("Invalid cache operations: 0x%x\n", cache_ops); + return false; + } + return true; +} + +static int update_pte_prot(pte_t *ptep, unsigned long addr __always_unused, void *data) +{ + pgprot_t *pgprot = (pgprot_t *)data; + pte_t ptent_old, ptent_new; + + ptent_old = ptep_get(ptep); + + ptent_new = pfn_pte(pte_pfn(ptent_old), *pgprot); + if (pte_special(ptent_old)) + ptent_new = pte_mkspecial(ptent_new); + + set_pte(ptep, ptent_new); + return 0; +} + +static void log_ownership_change(struct obmm_region *reg, uint64_t start, uint64_t end, + uint8_t mem_state, uint8_t cache_ops) +{ + pr_debug("obmm memory %d ownership change: pid=%d start=%#llx end=%#llx mem_state=%u cache_ops=%u\n", + reg->regionid, current->pid, start, end, mem_state, cache_ops); +} + +/* the caller holds mm mmap lock */ +static long update_region_page_range(const struct obmm_cmd_update_range *update_info) +{ + int ret; + pgprot_t pgprot; + + /* decide new page protection properties */ + pgprot = mem_state_to_pgprot(update_info->mem_state); + + /* + * we currently do not update VMA properties. Instead we manipulate the + * page table entries directly: VMA-level manipulation is not + * preferrable because the users want to have page-level control. + * Sub-VMA manipulations, which involves frequent merge and split, + * require efforts. But we just do not have enough time. + */ + + pr_debug("changing pgtable pgprot to 0x%llx: pid=%d start=0x%llx end=0x%llx\n", + pgprot_val(pgprot), current->pid, update_info->start, update_info->end); + /* not sure whether this part MUST be protected by the write lock */ + ret = apply_to_page_range(current->mm, update_info->start, + update_info->end - update_info->start, update_pte_prot, &pgprot); + if (ret) { + pr_err("failed to change pgprot to 0x%llx: pid=%d start=0x%llx end=0x%llx\n", + pgprot_val(pgprot), current->pid, update_info->start, update_info->end); + return ret; + } + pr_debug("user pgtable updated\n"); + obmm_flush_tlb(current->mm); + pr_debug("TLB flushed\n"); + + return 0; +} + +static bool validate_vma_attrs(struct vm_area_struct *vma, struct file *file, + const struct obmm_cmd_update_range *update_info) +{ + if (!vma) { + pr_err("vma not found for update range: start=%#llx end=%#llx.\n", + update_info->start, update_info->end); + return false; + } + if (vma->vm_file == NULL || file == NULL || + vma->vm_file->private_data != file->private_data) { + pr_err("VA range [%#llx, %#llx) is not a mapping of the target memdev.\n", + update_info->start, update_info->end); + return false; + } + if (update_info->start < vma->vm_start || update_info->end > vma->vm_end) { + pr_err("invalid update range: request [%#llx, %#llx), full range [%#lx, %#lx)\n", + update_info->start, update_info->end, vma->vm_start, vma->vm_end); + return false; + } + return true; +} + +struct scan_context { + struct obmm_region *reg; + struct obmm_local_state_info *local_state_info; + unsigned long vma_start; + uint8_t target_mem_state; + uint8_t range_mem_state; + unsigned long local_page_idx; + unsigned long page_count; +}; + +static int do_scan_region_and_flush(struct scan_context *ctx, unsigned long region_page_idx_start, + unsigned long idx_offset_start, unsigned long idx_offset, + bool is_read) +{ + uint8_t cache_ops; + unsigned long phys_offset, size; + + cache_ops = is_read ? OBMM_SHM_CACHE_INVAL : OBMM_SHM_CACHE_WB_INVAL; + phys_offset = (region_page_idx_start + idx_offset_start) << PAGE_SHIFT; + size = (idx_offset - idx_offset_start) << PAGE_SHIFT; + return obmm_region_flush_range(ctx->reg, phys_offset, size, cache_ops); +} + +/* + * Scan the global permission count and flush the cache + * for intervals where the read permission count is 1 + * and write permission count is 0. + */ +static int scan_region_and_flush(struct scan_context *ctx, bool is_read) +{ + unsigned long idx_offset, region_page_idx_start, idx_offset_start; + struct obmm_ownership_info *info; + int ret; + uint32_t state_count, read_count, write_count; + bool start_flag, stop_flag; + + info = ctx->reg->ownership_info; + /* translate to region page idx */ + region_page_idx_start = ctx->local_page_idx + ctx->local_state_info->orig_pgoff; + + idx_offset_start = -1; + for (idx_offset = 0; idx_offset < ctx->page_count; idx_offset++) { + state_count = info->mem_state_arr[region_page_idx_start + idx_offset]; + read_count = GET_R_COUNTER(state_count); + write_count = GET_W_COUNTER(state_count); + + if (is_read) { + start_flag = (write_count == 0 && read_count == 1); + stop_flag = (write_count != 0 || read_count != 1); + } else { + start_flag = (write_count == 1); + stop_flag = (write_count != 1); + } + + if (start_flag && idx_offset_start == -1) { + idx_offset_start = idx_offset; + } else if (stop_flag && idx_offset_start != -1) { + /* flush the range [idx_offset_start, idx_offset) */ + ret = do_scan_region_and_flush(ctx, region_page_idx_start, idx_offset_start, + idx_offset, is_read); + if (ret) + return ret; + idx_offset_start = -1; + } + } + /* check if there is a range not flushed */ + if (idx_offset_start != -1) { + ret = do_scan_region_and_flush(ctx, region_page_idx_start, idx_offset_start, + idx_offset, is_read); + if (ret) + return ret; + } + return 0; +} + +static int do_scan_and_flush(struct scan_context *ctx) +{ + int ret; + uint8_t cache_ops; + unsigned long size, vm_start; + + cache_ops = infer_cache_ops(ctx->range_mem_state, ctx->target_mem_state); + vm_start = ctx->vma_start + (ctx->local_page_idx << PAGE_SHIFT); + size = (unsigned long)ctx->page_count << PAGE_SHIFT; + + log_ownership_change(ctx->reg, vm_start, vm_start + size, ctx->target_mem_state, cache_ops); + if (cache_ops == OBMM_SHM_CACHE_NONE) { + /* ignore none ops */ + ret = 0; + } else if (cache_ops == OBMM_SHM_CACHE_WB_INVAL || cache_ops == OBMM_SHM_CACHE_WB_ONLY) { + /* may need to split and flush */ + ret = scan_region_and_flush(ctx, false); + } else { + /* may need to split and flush */ + ret = scan_region_and_flush(ctx, true); + } + return ret; +} + +/* + * Scan pages in a range and flush pages which are not in use. + * The caller holds region state_mutex lock. + */ +static int scan_and_flush(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info) +{ + struct obmm_local_state_info *local_state_info; + int idx_offset, page_count, local_page_idx_start, idx_offset_start; + uint8_t mem_state_start, mem_state; + struct scan_context ctx; + int ret; + + page_count = (update_info->end - update_info->start) >> PAGE_SHIFT; + + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + local_page_idx_start = vma_addr_to_page_idx_local(vma, update_info->start); + + ctx.reg = reg; + ctx.local_state_info = local_state_info; + ctx.vma_start = vma->vm_start; + ctx.target_mem_state = update_info->mem_state; + + idx_offset_start = 0; + mem_state_start = local_state_info->local_mem_state_arr[local_page_idx_start]; + for (idx_offset = 1; idx_offset < page_count; idx_offset++) { + mem_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + if (mem_state == mem_state_start) + continue; + + ctx.range_mem_state = mem_state_start; + ctx.local_page_idx = local_page_idx_start + idx_offset_start; + ctx.page_count = idx_offset - idx_offset_start; + + ret = do_scan_and_flush(&ctx); + if (ret) + return ret; + + idx_offset_start = idx_offset; + mem_state_start = mem_state; + } + + ctx.range_mem_state = mem_state_start; + ctx.local_page_idx = local_page_idx_start + idx_offset_start; + ctx.page_count = idx_offset - idx_offset_start; + ret = do_scan_and_flush(&ctx); + return ret; +} + +static void print_update_param(const struct obmm_cmd_update_range *update_info) +{ + pr_debug("obmm_set_ownership: pid=%d va=[%#llx, %#llx) mem_state=%#x cache_ops=%#x\n", + current->pid, update_info->start, update_info->end, update_info->mem_state, + update_info->cache_ops); +} + +static bool validate_ownership_perm(struct file *file, + const struct obmm_cmd_update_range *update_info) +{ + uint8_t access_param = update_info->mem_state & OBMM_SHM_MEM_ACCESS_MASK; + vm_flags_t tmp_vmflags = VM_NONE; + + if (access_param == OBMM_SHM_MEM_READONLY) + tmp_vmflags |= VM_READ; + if (access_param == OBMM_SHM_MEM_READWRITE) + tmp_vmflags |= (VM_READ | VM_WRITE); + if (access_param == OBMM_SHM_MEM_READEXEC) + tmp_vmflags |= (VM_READ | VM_EXEC); + return validate_perm(file, tmp_vmflags); +} + +static long obmm_shm_update_range(struct file *file, + const struct obmm_cmd_update_range *update_info) +{ + int ret; + unsigned long phys_offset; + struct obmm_region *reg = (struct obmm_region *)file->private_data; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct obmm_local_state_info *local_state_info; + uint8_t cache_ops; + bool cacheable; + + print_update_param(update_info); + + if (file->f_flags & O_SYNC) + cacheable = false; + else + cacheable = true; + /* quick validation without VMA info. */ + if (!validate_update_info(reg, update_info, cacheable)) + return -EINVAL; + + if (!validate_ownership_perm(file, update_info)) { + pr_err("The target permission is not allowed for the vma.\n"); + return -EPERM; + } + + if (!validate_state(update_info->mem_state)) + return -EINVAL; + + if (update_info->cache_ops != OBMM_SHM_CACHE_INFER) { + /* validate cache operations */ + if (!validate_cache_ops(update_info->cache_ops)) + return -EINVAL; + } + + mmap_read_lock(mm); + + vma = find_vma(mm, update_info->start); + if (!validate_vma_attrs(vma, file, update_info)) { + ret = -EFAULT; + goto err_unlock; + } + + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + mutex_lock(®->state_mutex); + + ret = check_modify_ownership_allowed(reg, vma, update_info); + if (ret) { + pr_err("check range (%llx-%llx) ownership failed: %d\n", update_info->start, + update_info->end, ret); + goto err_mutex; + } + + ret = update_region_page_range(update_info); + if (ret) + goto err_mutex; + /* + * If the user specifies a cache operation, we perform the operation + * on the range specified by update_info. Otherwise, + * we dynamically calculate whether the cache operation is needed. + */ + if (update_info->cache_ops != OBMM_SHM_CACHE_INFER) { + cache_ops = update_info->cache_ops; + log_ownership_change(reg, update_info->start, update_info->end, + update_info->mem_state, cache_ops); + /* conditionally flush L3 cache & ub controller packet queue */ + phys_offset = update_info->start - vma->vm_start + + (local_state_info->orig_pgoff << PAGE_SHIFT); + ret = obmm_region_flush_range(reg, phys_offset, + update_info->end - update_info->start, cache_ops); + } else { + ret = scan_and_flush(reg, vma, update_info); + } + + if (ret) { + /* original ownership has been lost. */ + pr_err("ownership update: failed to flush cache, ret=%pe. not recoverable.\n", + ERR_PTR(ret)); + ret = -ENOTRECOVERABLE; + goto err_mutex; + } + update_ownership(reg, vma, update_info); + + mutex_unlock(®->state_mutex); + mmap_read_unlock(mm); + + pr_debug("obmm_set_ownership: completed.\n"); + return 0; + +err_mutex: + mutex_unlock(®->state_mutex); +err_unlock: + mmap_read_unlock(mm); + return ret; +} + +static long obmm_shm_fops_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long ret; + + switch (cmd) { + case OBMM_SHMDEV_UPDATE_RANGE: { + struct obmm_cmd_update_range cmd_update_range; + + ret = (long)copy_from_user(&cmd_update_range, (void __user *)arg, + sizeof(struct obmm_cmd_update_range)); + if (ret) { + pr_err("failed to load update_range argument"); + return -EFAULT; + } + + ret = obmm_shm_update_range(file, &cmd_update_range); + } break; + default: + ret = -ENOTTY; + } + return ret; +} + +const struct file_operations obmm_shm_fops = { .owner = THIS_MODULE, + .unlocked_ioctl = obmm_shm_fops_ioctl, + .mmap = obmm_shm_fops_mmap, + .get_unmapped_area = thp_get_unmapped_area, + .open = obmm_shm_fops_open, + .flush = obmm_shm_fops_flush, + .release = obmm_shm_fops_release }; + +static void obmm_shm_dev_release(struct device *dev) +{ + struct obmm_region *reg = container_of(dev, struct obmm_region, device); + + atomic_set(®->device_released, 1); + module_put(THIS_MODULE); +} + +void wait_until_dev_released(struct obmm_region *reg) +{ + while (atomic_read(®->device_released) == 0) + cpu_relax(); +} + +int obmm_shm_dev_add(struct obmm_region *reg) +{ + int ret; + dev_t devt; + + if (!try_module_get(THIS_MODULE)) { + pr_err("Module is dying. Reject all memory requests\n"); + return -EPERM; + } + + reg->mmap_count = 0; + reg->mmap_mode = OBMM_MMAP_INIT; + + devt = MKDEV(MAJOR(obmm_devt), reg->regionid); + cdev_init(®->cdevice, &obmm_shm_fops); + reg->cdevice.owner = THIS_MODULE; + reg->device.devt = devt; + reg->device.release = obmm_shm_dev_release; + reg->device.groups = obmm_region_get_attr_groups(reg); + reg->device.parent = obmm_shm_rootdev; + device_initialize(®->device); + + ret = dev_set_name(®->device, "obmm_shmdev%d", reg->regionid); + if (ret) { + pr_err("Failed to set name for shmdev %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto err_put_dev; + } + + ret = cdev_device_add(®->cdevice, ®->device); + if (ret) { + pr_err("Failed to add shm device %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto err_put_dev; + } + + atomic_set(®->device_released, 0); + + return 0; + + /* NOTE: If the device is properly initialized, the refcount of module + * should be maintained by device kobject (and the associated + * obmm_shm_dev_release function). The refcount of region is always + * recovered by kobject-triggered release function. + */ +err_put_dev: + put_device(®->device); + return ret; +} + +void obmm_shm_dev_del(struct obmm_region *reg) +{ + cdev_device_del(®->cdevice, ®->device); + put_device(®->device); +} + +int obmm_shm_dev_init(void) +{ + int ret; + + pr_info("shmdev: root device initialization started\n"); + ret = alloc_chrdev_region(&obmm_devt, OBMM_MIN_VALID_REGIONID, OBMM_REGIONID_MAX_COUNT, + obmm_shm_region_name); + if (ret) { + pr_err("Failed to allocate char device ID. ret=%pe\n", ERR_PTR(ret)); + goto err_reg_alloc; + } + + obmm_shm_rootdev = root_device_register(obmm_shm_rootdev_name); + if (IS_ERR_OR_NULL(obmm_shm_rootdev)) { + pr_err("error register obmm root device\n"); + ret = -ENOMEM; + goto err_rootdev; + } + + pr_info("shmdev: root device initialization completed\n"); + return 0; +err_rootdev: + unregister_chrdev_region(obmm_devt, OBMM_REGIONID_MAX_COUNT); +err_reg_alloc: + return ret; +} + +void obmm_shm_dev_exit(void) +{ + pr_info("shmdev: root device starts shutting down\n"); + root_device_unregister(obmm_shm_rootdev); + unregister_chrdev_region(obmm_devt, OBMM_REGIONID_MAX_COUNT); + pr_info("shmdev: root device shut down completed\n"); +} diff --git a/drivers/ub/obmm/obmm_shm_dev.h b/drivers/ub/obmm/obmm_shm_dev.h new file mode 100644 index 0000000000000000000000000000000000000000..e0bf3553a3ce61f30cb3a829a2ad58439c2be21a --- /dev/null +++ b/drivers/ub/obmm/obmm_shm_dev.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef OBMM_SHM_DEV_H +#define OBMM_SHM_DEV_H + +#include "obmm_core.h" + +int obmm_shm_dev_init(void); +void obmm_shm_dev_exit(void); +int obmm_shm_dev_add(struct obmm_region *reg); +void obmm_shm_dev_del(struct obmm_region *reg); +void wait_until_dev_released(struct obmm_region *reg); + + +#endif diff --git a/drivers/ub/obmm/obmm_sysfs.h b/drivers/ub/obmm/obmm_sysfs.h new file mode 100644 index 0000000000000000000000000000000000000000..d661b37c406ed9bb7d90747b0d1a537bbaed989b --- /dev/null +++ b/drivers/ub/obmm/obmm_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef OBMM_SYSFS_H +#define OBMM_SYSFS_H + +#include "obmm_core.h" + +const struct attribute_group **obmm_region_get_attr_groups(const struct obmm_region *); + +#endif diff --git a/drivers/ub/obmm/obmm_sysfs_ub.c b/drivers/ub/obmm/obmm_sysfs_ub.c new file mode 100644 index 0000000000000000000000000000000000000000..22f8702f64688b7a3a47b24070870a95b5b4786b --- /dev/null +++ b/drivers/ub/obmm/obmm_sysfs_ub.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include + +#include "obmm_sysfs.h" +#include "obmm_preimport.h" +#include "obmm_import.h" + +static ssize_t size_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_region *region; + + region = container_of(dev, struct obmm_region, device); + return sysfs_emit(buf, "0x%llx\n", region->mem_size); +} +static DEVICE_ATTR_ADMIN_RO(size); + +static const char *get_type_str(const struct obmm_region *region) +{ + return region->type == OBMM_EXPORT_REGION ? "export" : "import"; +} + +/* show some attribute of a region as string */ +#define REGION_ATTR_SHOW(tag) \ + static ssize_t tag##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *region; \ + region = container_of(dev, struct obmm_region, device); \ + return sysfs_emit(buf, "%s\n", get_##tag##_str(region)); \ + } \ + static DEVICE_ATTR_ADMIN_RO(tag) + +REGION_ATTR_SHOW(type); + +static ssize_t priv_len_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_region *region; + + region = container_of(dev, struct obmm_region, device); + return sysfs_emit(buf, "%u\n", region->priv_len); +} +static DEVICE_ATTR_ADMIN_RO(priv_len); + +/* binary attribute of the sysfs entry for priv data */ +static ssize_t priv_read(struct file *filp __always_unused, struct kobject *kobj, + struct bin_attribute *bin_attr __always_unused, char *buf, loff_t off, + size_t count) +{ + struct device *dev; + struct obmm_region *region; + + dev = kobj_to_dev(kobj); + region = container_of(dev, struct obmm_region, device); + + if (off + count > OBMM_MAX_PRIV_LEN) + count = OBMM_MAX_PRIV_LEN - off; + memcpy(buf, region->priv + off, count); + + return count; +} + +static struct bin_attribute bin_attr_priv __ro_after_init = { + .attr = { + .name = "priv", + .mode = 0400, + }, + .read = priv_read, + .size = OBMM_MAX_PRIV_LEN, +}; + +/* show some attribute of a region as string */ +#define REGION_FLAG_SHOW(flag) \ + static ssize_t flag##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *region; \ + region = container_of(dev, struct obmm_region, device); \ + return sysfs_emit(buf, "%d\n", region_##flag(region)); \ + } \ + static DEVICE_ATTR_ADMIN_RO(flag) + +REGION_FLAG_SHOW(allow_mmap); +REGION_FLAG_SHOW(memory_from_user); +REGION_FLAG_SHOW(preimport); + +/* for export region only */ +static ssize_t node_mem_size_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + unsigned int i; + ssize_t count; + struct obmm_region *reg; + struct obmm_export_region *e_reg; + + reg = container_of(dev, struct obmm_region, device); + e_reg = container_of(reg, struct obmm_export_region, region); + + count = sysfs_emit(buf, "%#llx", e_reg->node_mem_size[0]); + for (i = 1; i < e_reg->node_count; i++) + count += sysfs_emit_at(buf, count, ",%#llx", e_reg->node_mem_size[i]); + count += sysfs_emit_at(buf, count, "\n"); + return count; +} +static DEVICE_ATTR_ADMIN_RO(node_mem_size); + +static ssize_t deid_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_import_region *i_reg; + struct obmm_export_region *e_reg; + struct obmm_region *reg; + + reg = container_of(dev, struct obmm_region, device); + if (reg->type == OBMM_EXPORT_REGION) { + e_reg = container_of(reg, struct obmm_export_region, region); + return sysfs_emit(buf, EID_FMT64 "\n", EID_ARGS64_H(e_reg->deid), + EID_ARGS64_L(e_reg->deid)); + } + i_reg = container_of(reg, struct obmm_import_region, region); + return sysfs_emit(buf, EID_FMT64 "\n", EID_ARGS64_H(i_reg->deid), + EID_ARGS64_L(i_reg->deid)); +} +static DEVICE_ATTR_ADMIN_RO(deid); + +static ssize_t seid_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_import_region *i_reg; + struct obmm_region *reg; + + reg = container_of(dev, struct obmm_region, device); + i_reg = container_of(reg, struct obmm_import_region, region); + + return sysfs_emit(buf, EID_FMT64 "\n", EID_ARGS64_H(i_reg->seid), + EID_ARGS64_L(i_reg->seid)); +} +static DEVICE_ATTR_ADMIN_RO(seid); + +#define COMMON_FIELD_SHOW(field, fmt) \ + static ssize_t field##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *reg; \ + struct obmm_export_region *e_reg; \ + reg = container_of(dev, struct obmm_region, device); \ + e_reg = container_of(reg, struct obmm_export_region, region); \ + return sysfs_emit(buf, fmt, e_reg->field); \ + } \ + static DEVICE_ATTR_ADMIN_RO(field) + +COMMON_FIELD_SHOW(tokenid, "0x%x\n"); +COMMON_FIELD_SHOW(uba, "0x%llx\n"); + +#define IREG_FIELD_SHOW(field, fmt) \ + static ssize_t field##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *reg; \ + struct obmm_import_region *i_reg; \ + reg = container_of(dev, struct obmm_region, device); \ + i_reg = container_of(reg, struct obmm_import_region, region); \ + return sysfs_emit(buf, fmt, i_reg->field); \ + } \ + static DEVICE_ATTR_ADMIN_RO(field) + +IREG_FIELD_SHOW(pa, "0x%llx\n"); +IREG_FIELD_SHOW(numa_id, "%d\n"); +IREG_FIELD_SHOW(dcna, "0x%x\n"); +IREG_FIELD_SHOW(scna, "0x%x\n"); + +static struct attribute *root_attrs[] __ro_after_init = { + &dev_attr_size.attr, + &dev_attr_type.attr, + &dev_attr_priv_len.attr, + &dev_attr_allow_mmap.attr, + NULL, +}; + +static struct bin_attribute *root_bin_attrs[] __ro_after_init = { + &bin_attr_priv, + NULL, +}; + +static struct attribute *import_numa_attrs[] __ro_after_init = { + &dev_attr_numa_id.attr, + &dev_attr_pa.attr, + &dev_attr_dcna.attr, + &dev_attr_scna.attr, + &dev_attr_preimport.attr, + &dev_attr_seid.attr, + &dev_attr_deid.attr, + NULL, +}; +static struct attribute *import_mmap_attrs[] __ro_after_init = { + &dev_attr_pa.attr, + &dev_attr_dcna.attr, + &dev_attr_scna.attr, + &dev_attr_seid.attr, + &dev_attr_deid.attr, + NULL, +}; + +static struct attribute *export_attrs[] __ro_after_init = { + &dev_attr_node_mem_size.attr, + &dev_attr_uba.attr, + &dev_attr_tokenid.attr, + &dev_attr_memory_from_user.attr, + &dev_attr_deid.attr, + NULL, +}; + +static struct attribute_group root_attrs_group __ro_after_init = { + .name = NULL, + .attrs = root_attrs, + .bin_attrs = root_bin_attrs, +}; + +#define SYSFS_NUMA_REMOTE 1U + +static unsigned int get_import_region_sysfs_index(const struct obmm_region *region) +{ + unsigned int index = 0; + + if (region_numa_remote(region)) + index |= SYSFS_NUMA_REMOTE; + + return index; +} + +static const struct attribute_group import_attrs_groups[] = { + [0] = { + .name = "import_info", + .attrs = import_mmap_attrs, + }, + [SYSFS_NUMA_REMOTE] = { + .name = "import_info", + .attrs = import_numa_attrs, + }, +}; + +static const struct attribute_group export_attrs_group = { + .name = "export_info", + .attrs = export_attrs, +}; + +static const struct attribute_group *obmm_import_attrs_groups_list[][3] = { + { &root_attrs_group, &import_attrs_groups[0], NULL }, + { &root_attrs_group, &import_attrs_groups[1], NULL }, + { &root_attrs_group, &import_attrs_groups[2], NULL }, + { &root_attrs_group, &import_attrs_groups[3], NULL }, +}; + +static const struct attribute_group *obmm_export_attrs_groups[] = { + &root_attrs_group, + &export_attrs_group, + NULL, +}; + +const struct attribute_group **obmm_region_get_attr_groups(const struct obmm_region *region) +{ + unsigned int index; + + if (region->type == OBMM_EXPORT_REGION) + return obmm_export_attrs_groups; + index = get_import_region_sysfs_index(region); + return obmm_import_attrs_groups_list[index]; +} diff --git a/drivers/ub/obmm/ubmempool_allocator.c b/drivers/ub/obmm/ubmempool_allocator.c new file mode 100644 index 0000000000000000000000000000000000000000..da6094288f7b2a467f9759cda9ba6b01e9f784e2 --- /dev/null +++ b/drivers/ub/obmm/ubmempool_allocator.c @@ -0,0 +1,671 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "obmm_core.h" +#include "conti_mem_allocator.h" +#include "obmm_cache.h" +#include "ubmempool_allocator.h" + +#define MAX_DEFAULT_PARAM_LENGTH 20 +static char default_mempool_size[MAX_DEFAULT_PARAM_LENGTH] = "1G"; +static char *mempool_size = default_mempool_size; +module_param(mempool_size, charp, 0440); +MODULE_PARM_DESC(mempool_size, "Max aviliable cached memory total."); + +/* contract 1T when receive lowmem notify */ +static size_t mempool_contract_size = 1ULL << 40; + +static int mempool_refill_timeout = 30000; +module_param(mempool_refill_timeout, int, 0440); +MODULE_PARM_DESC(mempool_refill_timeout, + "After detecting a memory shortage, attempt to expand the memory pool again after a period of time."); + +struct mem_allocator { + struct timer_list refill_timer; + struct conti_mem_allocator allocator; + size_t pool_size; + bool can_expand; +}; + +static inline struct mem_allocator *refill_timer_to_mem_allocator(struct timer_list *timer) +{ + return container_of(timer, struct mem_allocator, refill_timer); +} +static inline struct mem_allocator * +conti_mem_to_mem_allocator(struct conti_mem_allocator *allocator) +{ + return container_of(allocator, struct mem_allocator, allocator); +} + +static void refill_timeout(struct timer_list *timer) +{ + struct mem_allocator *m = refill_timer_to_mem_allocator(timer); + + m->can_expand = true; +} + +static struct mem_allocator mem_allocators[OBMM_MAX_LOCAL_NUMA_NODES]; + +static void pool_delay_expand(int nid) +{ + if (is_online_local_node(nid) && mem_allocators[nid].allocator.initialized) { + mem_allocators[nid].can_expand = false; + mem_allocators[nid].refill_timer.expires = + jiffies + msecs_to_jiffies(mempool_refill_timeout); + mod_timer(&mem_allocators[nid].refill_timer, + mem_allocators[nid].refill_timer.expires); + } +} + +void free_memory_contiguous(struct mem_description_pool *desc) +{ + int i; + + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) { + if (list_empty(&desc->head[i])) + continue; + + conti_free_memory(&mem_allocators[i].allocator, &desc->head[i]); + } +} + +static int clear_block(struct conti_mem_allocator *a __always_unused, struct memseg_node *p) +{ + phys_addr_t pa = p->addr; + size_t size = p->size; + void *va; + int ret; + + ret = set_linear_mapping_invalid(pa >> PAGE_SHIFT, (pa + size) >> PAGE_SHIFT, false); + if (ret < 0) { + pr_err_ratelimited("%s: error setting kernel pagetable; set_inval=false", __func__); + return ret; + } + va = ioremap_cache(pa, size); + if (!va) { + pr_err_ratelimited("%s: failed to run ioremap.\n", __func__); + goto out_recover_kernel_pagetable; + } + memset(va, 0, size); + iounmap(va); + /* flush cache after set_linear_mapping_invalid */ + ret = set_linear_mapping_invalid(pa >> PAGE_SHIFT, (pa + size) >> PAGE_SHIFT, true); + if (ret < 0) { + pr_err_ratelimited("%s: error setting kernel pagetable; set_inval=false", __func__); + return ret; + } + ret = flush_cache_by_pa(pa, size, OBMM_SHM_CACHE_WB_INVAL); + if (ret) { + pr_err_ratelimited("%s: failed to flush cache: %d; retval=%d\n", __func__, + OBMM_SHM_CACHE_INVAL, ret); + return ret; + } + return 0; +out_recover_kernel_pagetable: + WARN_ON(set_linear_mapping_invalid(pa >> PAGE_SHIFT, (pa + size) >> PAGE_SHIFT, true)); + return ret; +} + +int allocate_memory_contiguous(uint64_t size[], int length, struct mem_description_pool *desc, + bool zero, bool allow_slow) +{ + int i, ret; + struct list_head head; + size_t allocated; + + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) + INIT_LIST_HEAD(&desc->head[i]); + + for (i = 0; i < length; i++) { + INIT_LIST_HEAD(&head); + if (size[i] == 0) + continue; + if (!mem_allocators[i].allocator.initialized) { + pr_err("%s:no allocator working on node %d\n", __func__, i); + ret = -ENODEV; + goto err_free_memory; + } + if (size[i] % mem_allocators[i].allocator.granu) { + pr_err("%s: size %#llx not aligned to allocator granu %#lx on node %d\n", + __func__, size[i], mem_allocators[i].allocator.granu, i); + ret = -EINVAL; + goto err_free_memory; + } + allocated = conti_alloc_memory(&mem_allocators[i].allocator, size[i], &head, zero, + allow_slow); + list_replace(&head, &desc->head[i]); + if (allocated < size[i]) { + pr_err("%s: failed to alloc %#llx bytes for node %d\n", __func__, size[i], + i); + ret = -ENOMEM; + goto err_free_memory; + } + } + + return 0; +err_free_memory: + free_memory_contiguous(desc); + return ret; +} + +static size_t cma_contract_size(struct conti_mem_allocator *a) +{ + struct mem_allocator *m = conti_mem_to_mem_allocator(a); + ssize_t size = (ssize_t)(ALIGN(conti_get_avail(a) - m->pool_size, a->granu)); + + return size > 0 ? size : 0; +} + +static bool cma_need_contract(struct conti_mem_allocator *a) +{ + return cma_contract_size(a) > 0; +} + +static size_t cma_expand_size(struct conti_mem_allocator *a) +{ + struct mem_allocator *m = conti_mem_to_mem_allocator(a); + ssize_t size = (ssize_t)ALIGN_DOWN(m->pool_size - conti_get_avail(a), a->granu); + + return size > 0 ? size : 0; +} + +static bool cma_need_expand(struct conti_mem_allocator *a) +{ + struct mem_allocator *m = conti_mem_to_mem_allocator(a); + + if (m->can_expand) + return cma_expand_size(a) > 0; + return false; +} + +static int set_memseg_linear_mapping_invalid(struct memseg_node *node, bool set_nc) +{ + int ret; + unsigned long start_pfn, end_pfn; + + start_pfn = PHYS_PFN(node->addr); + end_pfn = PHYS_PFN(node->addr + node->size); + pr_debug("call external: set_linear_mapping_invalid(set_nc=%d)\n", set_nc); + ret = set_linear_mapping_invalid(start_pfn, end_pfn, set_nc); + if (ret) { + pr_err("failed to update kernel linear mapping cacheability: error=%pe.\n", + ERR_PTR(ret)); + return ret; + } + pr_debug("external called: set_linear_mapping_invalid(set_nc=%d, ret=%pe)\n", + set_nc, ERR_PTR(ret)); + return 0; +} + +static struct memseg_node *hugetlb_pmd_alloc_memseg(struct conti_mem_allocator *a) +{ + int ret; + struct folio *folio; + struct memseg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return NULL; + + folio = hugetlb_pool_alloc_size(a->nid, PMD_SIZE); + if (IS_ERR(folio)) { + pr_debug("hugetlb_pool_alloc(%d) returned %ld.\n", a->nid, PTR_ERR(folio)); + pool_delay_expand(a->nid); + goto out_free_node; + } + + node->size = folio_size(folio); + node->addr = folio_pfn(folio) << PAGE_SHIFT; + if (unlikely(node->size != a->granu)) { + pr_err("hugetlb_pool_alloc() returned folio smaller than expected, expect=%#zx, actual=%#zx\n", + a->granu, node->size); + goto out_free_seg; + } + + ret = set_memseg_linear_mapping_invalid(node, true); + if (unlikely(ret)) + goto out_free_seg; + + return node; + +out_free_seg: + ret = hugetlb_pool_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free hugetlb folio on roll back, retval=%d.\n", ret); +out_free_node: + kfree(node); + return NULL; +} + +static void hugetlb_free_memseg(struct conti_mem_allocator *a __always_unused, + struct memseg_node *node) +{ + int ret; + struct folio *folio; + + if (unlikely(!node)) { + pr_err("attempted to free NULL hugetlb memseg.\n"); + return; + } + + folio = pfn_folio(node->addr >> PAGE_SHIFT); + + set_memseg_linear_mapping_invalid(node, false); + + ret = hugetlb_pool_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free hugetlb folio, ret=%pe.\n", ERR_PTR(ret)); + + kfree(node); +} + +static struct memseg_node *hugetlb_pud_alloc_memseg(struct conti_mem_allocator *a) +{ + int ret; + struct folio *folio; + struct memseg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return NULL; + + folio = hugetlb_pool_alloc_size(a->nid, PUD_SIZE); + if (IS_ERR(folio)) { + pr_debug("hugetlb_pool_alloc(%d) returned %ld.\n", a->nid, PTR_ERR(folio)); + pool_delay_expand(a->nid); + goto out_free_node; + } + + node->size = folio_size(folio); + node->addr = folio_pfn(folio) << PAGE_SHIFT; + if (unlikely(node->size != a->granu)) { + pr_err("hugetlb_pool_alloc() returned folio smaller than expected, expect=%#zx, actual=%#zx\n", + a->granu, node->size); + goto out_free_seg; + } + + ret = set_memseg_linear_mapping_invalid(node, true); + if (unlikely(ret)) + goto out_free_seg; + + return node; + +out_free_seg: + ret = hugetlb_pool_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free hugetlb folio on roll back, ret=%pe.\n", ERR_PTR(ret)); +out_free_node: + kfree(node); + return NULL; +} + +static void buddy_free_memseg(struct conti_mem_allocator *a __always_unused, + struct memseg_node *node) +{ + int ret; + struct folio *folio; + + if (unlikely(!node)) { + pr_err("attempted to free NULL buddy memseg.\n"); + return; + } + + folio = pfn_folio(node->addr >> PAGE_SHIFT); + + set_memseg_linear_mapping_invalid(node, false); + + ret = pfn_range_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free buddy folio, ret=%pe.\n", ERR_PTR(ret)); + + kfree(node); +} + +static struct memseg_node *buddy_alloc_memseg(struct conti_mem_allocator *a) +{ + int ret; + struct folio *folio; + struct memseg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return NULL; + + folio = pfn_range_alloc(a->granu >> PAGE_SHIFT, a->nid); + if (IS_ERR(folio)) { + pr_debug("pfn_range_alloc(%#lx, %d) returned %ld.\n", a->granu >> PAGE_SHIFT, + a->nid, PTR_ERR(folio)); + pool_delay_expand(a->nid); + goto out_free_node; + } + + node->size = folio_size(folio); + node->addr = folio_pfn(folio) << PAGE_SHIFT; + if (unlikely(node->size != a->granu)) { + pr_err("pfn_range_alloc() returned folio smaller than expected, expect=%#zx, actual=%#zx\n", + a->granu, node->size); + goto out_free_seg; + } + + ret = set_memseg_linear_mapping_invalid(node, true); + if (unlikely(ret)) + goto out_free_seg; + + return node; + +out_free_seg: + ret = pfn_range_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free buddy folio on roll back, ret=%pe.\n", ERR_PTR(ret)); +out_free_node: + kfree(node); + return NULL; +} + +static struct conti_mempool_ops hugetlb_pmd_ops = { + .clear_memseg = clear_block, + .pool_free_memseg = hugetlb_free_memseg, + .pool_alloc_memseg = hugetlb_pmd_alloc_memseg, + .need_contract = cma_need_contract, + .contract_size = cma_contract_size, + .need_expand = cma_need_expand, + .expand_size = cma_expand_size, +}; + +static struct conti_mempool_ops hugetlb_pud_ops = { + .clear_memseg = clear_block, + .pool_free_memseg = hugetlb_free_memseg, + .pool_alloc_memseg = hugetlb_pud_alloc_memseg, + .need_contract = cma_need_contract, + .contract_size = cma_contract_size, + .need_expand = cma_need_expand, + .expand_size = cma_expand_size, +}; + +static struct conti_mempool_ops buddy_ops = { + .clear_memseg = clear_block, + .pool_free_memseg = buddy_free_memseg, + .pool_alloc_memseg = buddy_alloc_memseg, + .need_contract = cma_need_contract, + .contract_size = cma_contract_size, + .need_expand = cma_need_expand, + .expand_size = cma_expand_size, +}; + +static void mem_allocator_uninit_one(int nid) +{ + conti_mem_allocator_deinit(&mem_allocators[nid].allocator); + timer_shutdown_sync(&mem_allocators[nid].refill_timer); +} + +static char default_mempool_allocator[MAX_DEFAULT_PARAM_LENGTH] = "buddy_highmem"; +static char *mempool_allocator = default_mempool_allocator; +module_param(mempool_allocator, charp, 0440); +MODULE_PARM_DESC(mempool_allocator, + "OBMM mempool allocator. If not set, use buddy_highmem allocator."); +static const char * const allocator_names[] = { + "hugetlb_pmd", + "hugetlb_pud", + "buddy_highmem", +}; +static const struct conti_mempool_ops *allocator_ops[] = { + &hugetlb_pmd_ops, + &hugetlb_pud_ops, + &buddy_ops, + NULL +}; +enum allocator_id { + ALLOCATOR_HUGETLB_PMD = 0, + ALLOCATOR_HUGETLB_PUD, + ALLOCATOR_BUDDY_HIGHMEM, + ALLOCATOR_MAX +}; + +static char *mem_allocator_granu; +module_param(mem_allocator_granu, charp, 0440); +MODULE_PARM_DESC(mem_allocator_granu, "Basic granu of obmm memory allocator."); + +static int select_mem_allocator(void) +{ + int i; + + for (i = 0; i < ALLOCATOR_MAX; i++) { + if (!strcmp(allocator_names[i], mempool_allocator)) + break; + } + if (i == ALLOCATOR_MAX) { + pr_err("invalid mem allocator specified: %s\n", mempool_allocator); + return ALLOCATOR_MAX; + } + if (contig_mem_pool_percent != 100 && i == ALLOCATOR_HUGETLB_PMD) { + pr_err("using allocator %s when pmd_mapping not 100%%\n", allocator_names[i]); + i = ALLOCATOR_MAX; + } + + return i; +} + +static void print_granu(char s[MAX_DEFAULT_PARAM_LENGTH], size_t granu) +{ + static const char * const units[] = {"", "K", "M", "G", "T"}; + int i = 0; + + while (granu >= 1024 && i < ARRAY_SIZE(units) - 1) { + granu >>= 10; + i++; + } + snprintf(s, MAX_DEFAULT_PARAM_LENGTH, "%lu%s", granu, units[i]); +} + +static int init_mem_allocator_granu(enum allocator_id aid) +{ + static char def_granu[MAX_DEFAULT_PARAM_LENGTH]; + char *p = mem_allocator_granu; + + if (!mem_allocator_granu) { + if (aid == ALLOCATOR_HUGETLB_PUD) + __obmm_memseg_size = PUD_SIZE; + else + __obmm_memseg_size = PMD_SIZE; + + print_granu(def_granu, __obmm_memseg_size); + mem_allocator_granu = def_granu; + return 0; + } + + __obmm_memseg_size = memparse(p, &p); + if (*p != '\0') { + pr_err("invalid mem_allocator_granu: %s\n", mem_allocator_granu); + return -EINVAL; + } + if (__obmm_memseg_size < OBMM_BASIC_GRANU || __obmm_memseg_size % OBMM_BASIC_GRANU) { + pr_err("unsupported mem_allocator_granu: %s\n", mem_allocator_granu); + return -EINVAL; + } + if (aid == ALLOCATOR_HUGETLB_PUD && __obmm_memseg_size != PUD_SIZE) { + pr_err("unsupported mem_allocator_granu for hugetlb_pud allocator: %s\n", + mem_allocator_granu); + return -EINVAL; + } + if (aid == ALLOCATOR_HUGETLB_PMD && __obmm_memseg_size != PMD_SIZE) { + pr_err("unsupported mem_allocator_granu for hugetlb_pmd allocator: %s\n", + mem_allocator_granu); + return -EINVAL; + } + + return 0; +} + +static int mem_allocator_init_one(int nid, enum allocator_id aid) +{ + struct conti_mem_allocator *allocator = &mem_allocators[nid].allocator; + int ret; + + mem_allocators[nid].can_expand = true; + timer_setup(&mem_allocators[nid].refill_timer, refill_timeout, 0); + + ret = conti_mem_allocator_init(allocator, nid, OBMM_MEMSEG_SIZE, allocator_ops[aid], + "%s/%d", allocator_names[aid], nid); + if (ret) + goto err_del_timer; + + return 0; +err_del_timer: + timer_shutdown_sync(&mem_allocators[nid].refill_timer); + return ret; +} + +size_t ubmempool_contract(int nid, bool is_hugepage) +{ + struct conti_mem_allocator *a; + bool is_hugetlb_pool; + + if (!is_online_local_node(nid)) { + pr_err_ratelimited( + "obmm tried to contract to alleviate lowmem, but nid(%d) is not valid.\n", + nid); + return 0; + } + + pr_debug_ratelimited("contract memory on nid: %d, is_hugepage: %d\n", nid, is_hugepage); + /* try to contract memory only when it is helpful */ + a = &mem_allocators[nid].allocator; + is_hugetlb_pool = a->ops == &hugetlb_pmd_ops || a->ops == &hugetlb_pud_ops; + if (is_hugepage == is_hugetlb_pool) { + pool_delay_expand(nid); + return conti_mem_allocator_contract(a, mempool_contract_size); + } + pr_debug_ratelimited("Not contracting;\n"); + + return 0; +} + +static int mempool_size_parse(char *p, int local_cnt) +{ + int nid; + size_t mempool = memparse(p, &p); + + if (local_cnt == 0) { + pr_err("There is no local numa, failed\n"); + return -EINVAL; + } + + for_each_online_local_node(nid) { + mem_allocators[nid].pool_size = div64_ul(mempool, local_cnt); + pr_info("nid=%d, pool size = %#lx bytes.\n", nid, mem_allocators[nid].pool_size); + } + return 0; +} + +/* Run in IRQ context. */ +static int handle_ghes_mem_ras(struct notifier_block *nb __always_unused, + unsigned long severity __always_unused, void *data) +{ + const struct cper_sec_mem_err *mem_err = (const struct cper_sec_mem_err *)data; + unsigned long pfn; + int nid; + + /* A defensive check for future Linux version. Currently GHES are + * handled in IRQ context. If it switches to NMI context, this handler + * no longer works. + */ + if (WARN_ON_ONCE(in_nmi())) + return NOTIFY_DONE; + + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) + return NOTIFY_DONE; + + pfn = PHYS_PFN(mem_err->physical_addr); + if (!pfn_valid(pfn)) + return NOTIFY_DONE; + + nid = page_to_nid(pfn_to_page(pfn)); + if (!is_online_local_node(nid)) + return NOTIFY_DONE; + + (void)conti_mem_allocator_isolate_memseg(&mem_allocators[nid].allocator, + mem_err->physical_addr); + return NOTIFY_OK; +} + +static struct notifier_block ghes_mem_ras_notifier_block = { + .notifier_call = handle_ghes_mem_ras, + .priority = 0, +}; + +int ubmempool_allocator_init(void) +{ + int i, j, nid, ret = 0, local_cnt = 0; + enum allocator_id aid; + + for_each_online_local_node(nid) { + if (nid >= OBMM_MAX_LOCAL_NUMA_NODES) { + /* be no mem_allocators[nid] is out of range */ + pr_err("Too many local NUMA nodes. OBMM rebuild required.\n"); + return -EOPNOTSUPP; + } + local_cnt++; + } + + memset(mem_allocators, 0, sizeof(struct mem_allocator) * OBMM_MAX_LOCAL_NUMA_NODES); + if (mempool_size) + ret = mempool_size_parse(mempool_size, local_cnt); + + if (ret) { + pr_err("memory allocator init failed.\n"); + return ret; + } + + aid = select_mem_allocator(); + if (aid == ALLOCATOR_MAX) + return -EINVAL; + + ret = init_mem_allocator_granu(aid); + if (ret) + return ret; + + for_each_online_local_node(i) { + ret = mem_allocator_init_one(i, aid); + if (ret) + goto failed; + } + + ghes_register_report_chain(&ghes_mem_ras_notifier_block); + + return 0; + +failed: + for_each_online_local_node(j) { + if (j < i) + mem_allocator_uninit_one(j); + } + + return ret; +} + +void ubmempool_allocator_exit(void) +{ + int i; + + ghes_unregister_report_chain(&ghes_mem_ras_notifier_block); + + for_each_online_local_node(i) { + if (!mem_allocators[i].allocator.initialized) + continue; + + mem_allocator_uninit_one(i); + } +} diff --git a/drivers/ub/obmm/ubmempool_allocator.h b/drivers/ub/obmm/ubmempool_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..7c17eb7cf85c9cea6bc584633becdb55943ee44d --- /dev/null +++ b/drivers/ub/obmm/ubmempool_allocator.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#ifndef UBMEMPOOL_ALLOCATOR_H +#define UBMEMPOOL_ALLOCATOR_H + +#include "obmm_core.h" + +void free_memory_contiguous(struct mem_description_pool *desc); + +int allocate_memory_contiguous(uint64_t size[], int length, struct mem_description_pool *desc, + bool zero, bool allow_slow); + +size_t ubmempool_contract(int nid, bool is_hugepage); + +int ubmempool_allocator_init(void); +void ubmempool_allocator_exit(void); + +#endif diff --git a/drivers/ub/sentry/Kconfig b/drivers/ub/sentry/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..9b2fad6d291a17708b3c6273f6fd79d06088f22d --- /dev/null +++ b/drivers/ub/sentry/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config UB_SENTRY + tristate "sentry message report" + depends on UB && ACPI_POWER_NOTIFIER_CHAIN + default m + help + Listens to kernel event(eg. oom) and send sentry msg to userspace + Provides device for userspace to read kernel message and reply ack + Provides kernel api to send message to userspace and wait for result + +config UB_SENTRY_REMOTE + tristate "sentry remote event reporter module" + depends on UB_SENTRY && UB_URMA + default m + help + report panic/reboot event msg diff --git a/drivers/ub/sentry/Makefile b/drivers/ub/sentry/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..0c084b61a2bc9788a0e48979198fefd3894ed9ba --- /dev/null +++ b/drivers/ub/sentry/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the sentry drivers. + +obj-$(CONFIG_UB_SENTRY) += sentry_msg_helper.o +obj-$(CONFIG_UB_SENTRY) += sentry_reporter.o +obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_urma_comm.o +obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_uvb_comm.o +obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_remote_reporter.o + +sentry_msg_helper-y := smh_core.o smh_message.o +sentry_remote_reporter-y := sentry_remote_server.o sentry_remote_client.o diff --git a/drivers/ub/sentry/sentry_remote_client.c b/drivers/ub/sentry/sentry_remote_client.c new file mode 100644 index 0000000000000000000000000000000000000000..ed4ac35cc718b432cf32d0fc77c3ef92eb52e658 --- /dev/null +++ b/drivers/ub/sentry/sentry_remote_client.c @@ -0,0 +1,1121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Client module, used for reporting panic or reboot events. + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" +#include "sentry_remote_reporter.h" + +#define PANIC_TIMEOUT_MS_MIN 0 +#define PANIC_TIMEOUT_MS_MAX 3600000 +#define KERNEL_REBOOT_TIMEOUT_MS_MIN 0 +#define KERNEL_REBOOT_TIMEOUT_MS_MAX 3600000 +#define LOCAL_EID_MAX_LEN (EID_MAX_LEN * 2 + 1 + 1) + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][remote client]: " fmt + +struct sentry_client_context { + char eid_str[MAX_DIE_NUM][EID_MAX_LEN]; + char eid_raw_str[LOCAL_EID_MAX_LEN]; /* for proc show */ + union ubcore_eid eid[MAX_DIE_NUM]; + int die_num_configured; + + struct proc_dir_entry *panic_proc_dir; + char **msg_str; + + unsigned long panic_timeout_ms; + unsigned long kernel_reboot_timeout_ms; + + bool panic_enable; + bool kernel_reboot_enable; + bool use_uvb; + bool use_urma; + + bool is_in_panic_status; + + uint32_t random_id; + + bool is_uvb_cis_func_registered; +}; + +static struct sentry_client_context sentry_client_ctx = { + .die_num_configured = MAX_DIE_NUM, + .panic_timeout_ms = 35000, + .kernel_reboot_timeout_ms = 35000, + .panic_enable = false, + .kernel_reboot_enable = false, + .use_uvb = true, + .use_urma = true, + .is_in_panic_status = false, + .random_id = 0, + .is_uvb_cis_func_registered = false, +}; + +/** + * strcmp_local_eid_from_msg - Compare message EID with local EIDs + * @msg_eid: EID from message to compare + * + * Return: true if EID matches a local EID, false otherwise + * + * This function checks if the provided EID matches any of the + * configured local EIDs. + */ +static bool strcmp_local_eid_from_msg(const char *msg_eid) +{ + for (int i = 0; i < sentry_client_ctx.die_num_configured; i++) { + if (strlen(sentry_client_ctx.eid_str[i]) == 0) { + pr_err("local_eid should have %d values, but %d-th value is empty\n", + sentry_client_ctx.die_num_configured, i); + break; + } + if (strncmp(msg_eid, sentry_client_ctx.eid_str[i], EID_MAX_LEN) == 0) + return true; + } + return false; +} + +/** + * get_ack_done - Check if acknowledgment is complete for local node + * @msg: Message to check + * @ack_type: Expected acknowledgment type + * @comm_type: Communication type + * + * Return: true if acknowledgment is complete, false otherwise + * + * This function verifies if the received acknowledgment message + * matches the expected parameters for the local node. + */ +static bool get_ack_done(const struct sentry_msg_helper_msg *msg, + enum sentry_msg_helper_msg_type ack_type, + enum SENTRY_REMOTE_COMM_TYPE comm_type) +{ + if (msg->type == ack_type && + msg->helper_msg_info.remote_info.cna == g_local_cna && + strcmp_local_eid_from_msg(msg->helper_msg_info.remote_info.eid)) { + pr_info("Receive ack message%s: [%d_%u_%s_%lu]\n", + (comm_type == COMM_TYPE_URMA) ? " from URMA" : + (comm_type == COMM_TYPE_UVB) ? " from UVB" : "", + msg->type, + g_local_cna, + msg->helper_msg_info.remote_info.eid, + msg->res); + return true; + } + return false; +} + +/** + * remote_event_handler - Handle remote event sending and acknowledgment + * @remote_type: Type of remote event + * @timeout_ms: Timeout in milliseconds + * + * Return: 0 on success, negative error code on failure + * + * This function handles the sending of remote events (panic/reboot) and + * waits for acknowledgments from remote nodes, supporting both URMA and UVB. + */ +int remote_event_handler(enum sentry_msg_helper_msg_type remote_type, + unsigned long timeout_ms) +{ + int ret; + bool uvb_send_success = false; + bool urma_send_success = false; + enum sentry_msg_helper_msg_type remote_ack_type; + char send_data[MAX_DIE_NUM][URMA_SEND_DATA_MAX_LEN]; + uint64_t start_count, current_count; + uint64_t code_run_count, code_run_times_ms; + uint64_t counts_per_sec = arch_timer_get_cntfrq(); + uint64_t timeout_counts = timeout_ms / 1000 * counts_per_sec; + bool ack_done = false; + int recv_msg_nodes; + int times = timeout_ms / MILLISECONDS_OF_EACH_MDELAY; + + /* Prepare send data for each die */ + for (int i = 0; i < sentry_client_ctx.die_num_configured; i++) { + if (strlen(sentry_client_ctx.eid_str[i]) == 0) { + pr_err("local_eid should have %d values, but %d-th value is empty\n", + sentry_client_ctx.die_num_configured, i); + return NOTIFY_OK; + } + + ret = snprintf(send_data[i], URMA_SEND_DATA_MAX_LEN - 1, + "%d_%u_%s_%lu_%u", remote_type, g_local_cna, + sentry_client_ctx.eid_str[i], timeout_ms, + sentry_client_ctx.random_id); + if ((size_t)ret >= URMA_SEND_DATA_MAX_LEN - 1) { + pr_err("msg str size exceeds the max value\n"); + return NOTIFY_OK; + } + } + + remote_ack_type = get_ack_type(remote_type); + if (remote_ack_type == SMH_MESSAGE_UNKNOWN) + return -EINVAL; + + start_count = read_sysreg(cntpct_el0); + + /* Main event sending and acknowledgment loop */ + for (int i = 0; i < times; i++) { + current_count = read_sysreg(cntpct_el0); + if (current_count - start_count >= timeout_counts) + break; + + /* Send via URMA if enabled */ + if (sentry_client_ctx.use_urma) { + for (int j = 0; j < sentry_client_ctx.die_num_configured; j++) { + if (strlen(sentry_client_ctx.eid_str[j]) == 0) + break; + + ret = urma_send(send_data[j], sizeof(send_data[j]), NULL, j); + if (ret > 0) { + urma_send_success = true; + pr_info("URMA send data [%s] [%d]: SUCCESS. die index %d\n", + send_data[j], i + 1, j); + } + } + } + + /* Send via UVB if enabled */ + if (sentry_client_ctx.use_uvb) { + ret = uvb_send(send_data[0], -1, + sentry_client_ctx.is_in_panic_status ? true : false); + if (ret > 0) { + uvb_send_success = true; + pr_info("UVB send data [%s] [%d]: SUCCESS\n", send_data[0], i + 1); + } + } + + /* Handle send failure */ + if (!urma_send_success && !uvb_send_success) { + pr_warn("UVB && URMA send data [%s]: FAILED\n", send_data[0]); + if (sentry_client_ctx.is_in_panic_status) + mdelay(MILLISECONDS_OF_EACH_MDELAY); + else + msleep(MILLISECONDS_OF_EACH_MDELAY); + continue; + } + + if (!sentry_client_ctx.is_in_panic_status) { + /* Not in panic status, check shared buffer */ + if (atomic_read(&sentry_remote_ctx.remote_event_ack_done) != 1) { + msleep(MILLISECONDS_OF_EACH_MDELAY); + continue; + } + + spin_lock(&sentry_buf_lock); + ack_done = get_ack_done(&sentry_remote_ctx.remote_event_ack_msg_buf, + remote_ack_type, COMM_TYPE_UNKNOWN); + spin_unlock(&sentry_buf_lock); + goto check_ack_and_sleep; + } + /* Handle acknowledgment in panic mode */ + if (uvb_send_success) { + /* In panic status, UVB uses sync mode */ + void *data = NULL; + ret = uvb_polling_sync(data); + + if (ret < 0 && ret != -ETIMEDOUT) { + pr_err("uvb_poll_window_sync failed\n"); + } else if (ret == -ETIMEDOUT) { + pr_info("uvb_polling_sync timeout\n"); + } else if (ret == 0) { + /* uvb_polling_sync success */ + if (atomic_read(&sentry_remote_ctx.remote_event_ack_done) != 1) + goto do_urma_recv; + + spin_lock(&sentry_buf_lock); + ack_done = get_ack_done(&sentry_remote_ctx.remote_event_ack_msg_buf, + remote_ack_type, COMM_TYPE_UVB); + spin_unlock(&sentry_buf_lock); + } + } + +do_urma_recv: + if (urma_send_success) { + /* In panic status, poll URMA directly */ + recv_msg_nodes = urma_recv(sentry_client_ctx.msg_str, + URMA_SEND_DATA_MAX_LEN); + if (recv_msg_nodes <= 0) + goto check_ack_and_sleep; + pr_info("urma received %d nodes\n", recv_msg_nodes); + for (int l = 0; l < recv_msg_nodes; l++) { + struct sentry_msg_helper_msg msg; + uint32_t random_id_stub; + + if (strcmp(HEARTBEAT, sentry_client_ctx.msg_str[l]) == 0 || + strcmp(HEARTBEAT_ACK, sentry_client_ctx.msg_str[l]) == 0) + continue; + + /* Convert and check acknowledgment */ + ret = convert_str_to_smh_msg(sentry_client_ctx.msg_str[l], + &msg, &random_id_stub); + if (ret) { + pr_warn("convert urma data failed: [%s]\n", + sentry_client_ctx.msg_str[l]); + continue; + } + ack_done = get_ack_done(&msg, remote_ack_type, + COMM_TYPE_URMA); + if (ack_done) + break; + } + } + +check_ack_and_sleep: + /* Check if acknowledgment received */ + if (ack_done) { + pr_info("Receive ack message, stop blocking early\n"); + break; + } + + pr_debug("No ACK for %d polling, wait %d ms\n", + i, MILLISECONDS_OF_EACH_MDELAY); + + /* Calculate precise sleep time */ + code_run_count = read_sysreg(cntpct_el0) - current_count; + code_run_times_ms = code_run_count * 1000 / counts_per_sec; + + if (code_run_times_ms < MILLISECONDS_OF_EACH_MDELAY) { + int sleep_time = MILLISECONDS_OF_EACH_MDELAY - code_run_times_ms; + if (sentry_client_ctx.is_in_panic_status) + mdelay(sleep_time); + else + msleep(sleep_time); + } + } + + return 0; +} + +/** + * check_if_eid_cna_is_set - Check if EID and CNA are properly configured + * + * Return: 0 if properly configured, -EINVAL otherwise + * + * This function validates that both CNA and EID are properly set + * before attempting to send remote events. + */ +static int check_if_eid_cna_is_set(void) +{ + size_t eid_len = strlen(sentry_client_ctx.eid_raw_str); + + if (g_local_cna > CNA_MAX_VALUE || eid_len == 0) { + pr_err("cna or eid not set, ignore current event\n"); + return -EINVAL; + } + return 0; +} + +/** + * check_if_urma_or_uvb_is_ready - Check if URMA or UVB communication is ready + * + * Return: 0 if at least one communication method is ready, -ENODEV otherwise + * + * This function checks the availability of URMA and UVB communication + * channels and updates the usage flags accordingly. + */ +static int check_if_urma_or_uvb_is_ready(void) +{ + if (sentry_client_ctx.use_urma && !g_is_created_ubcore_resource) { + pr_info("URMA not ready, disable URMA communication\n"); + sentry_client_ctx.use_urma = false; + } + + if (sentry_client_ctx.use_uvb && !(g_server_cna_valid_num > 0)) { + pr_warn("UVB not ready, disable UVB communication\n"); + sentry_client_ctx.use_uvb = false; + } + + if (!(sentry_client_ctx.use_urma || sentry_client_ctx.use_uvb)) { + pr_err("both urma and uvb not connected, ignore current event\n"); + return -ENODEV; + } + + return 0; +} + +/** + * panic_handler - Panic notifier handler + * @nb: Notifier block + * @code: Panic code + * @unused: Unused parameter + * + * Return: NOTIFY_OK + * + * This function handles system panic events by sending panic notifications + * to remote nodes and waiting for acknowledgments. + */ +int panic_handler(struct notifier_block *nb, unsigned long code, void *unused) +{ + if (!sentry_client_ctx.panic_enable) + return NOTIFY_OK; + + sentry_client_ctx.is_in_panic_status = true; + pr_info("Panic handler: received panic message\n"); + + if (check_if_eid_cna_is_set() || check_if_urma_or_uvb_is_ready()) + return NOTIFY_OK; + + pr_info("panic_timeout_ms %lu, cna [%u], eid [%s]\n", + sentry_client_ctx.panic_timeout_ms, g_local_cna, + sentry_client_ctx.eid_raw_str); + + set_urma_panic_mode(true); + remote_event_handler(SMH_MESSAGE_PANIC, sentry_client_ctx.panic_timeout_ms); + pr_info("Panic handler: Blocking finished\n"); + + return NOTIFY_OK; +} + +/** + * kernel_reboot_handler - Kernel reboot notifier handler + * @nb: Notifier block + * @code: Reboot code + * @unused: Unused parameter + * + * Return: NOTIFY_OK + * + * This function handles kernel reboot events by sending reboot notifications + * to remote nodes and waiting for acknowledgments. + */ +int kernel_reboot_handler(struct notifier_block *nb, unsigned long code, void *unused) +{ + if (!sentry_client_ctx.kernel_reboot_enable) + return NOTIFY_OK; + + pr_info("kernel reboot handler: received kernel reboot message\n"); + + if (check_if_eid_cna_is_set() || check_if_urma_or_uvb_is_ready()) + return NOTIFY_OK; + + pr_info("kernel_reboot_timeout_ms %lu, cna [%u], eid [%s]\n", + sentry_client_ctx.kernel_reboot_timeout_ms, g_local_cna, + sentry_client_ctx.eid_raw_str); + + set_urma_panic_mode(false); + remote_event_handler(SMH_MESSAGE_KERNEL_REBOOT, + sentry_client_ctx.kernel_reboot_timeout_ms); + pr_info("Kernel reboot handler: Blocking finished\n"); + + /* Stop URMA thread proactively */ + sentry_panic_reporter_exit(); + return NOTIFY_OK; +} + +/** + * proc_panic_reporter_enable_file_show - Show panic reporter enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_panic_reporter_enable_file_show(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.panic_enable ? "on" : "off"; + size_t len = sentry_client_ctx.panic_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_kernel_reboot_reporter_enable_file_show - Show kernel reboot reporter enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_reporter_enable_file_show(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.kernel_reboot_enable ? "on" : "off"; + size_t len = sentry_client_ctx.kernel_reboot_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_reporter_cna_show - Show local CNA value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_reporter_cna_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char cna_str[INTEGER_TO_STR_MAX_LEN]; + + snprintf(cna_str, sizeof(cna_str), "%u\n", g_local_cna); + return simple_read_from_buffer(buf, count, ppos, cna_str, strlen(cna_str)); +} + +/** + * proc_reporter_eid_show - Show local EID value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_reporter_eid_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, + sentry_client_ctx.eid_raw_str, + strlen(sentry_client_ctx.eid_raw_str)); +} + +/** + * proc_panic_enable_file_write - Write handler for panic enable control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_panic_enable_file_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for panic mode, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set panic mode failed\n"); + return -EFAULT; + } + + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + if (!crash_kexec_post_notifiers) { + pr_warn("crash_kexec_post_notifiers disabled, cannot enable panic event\n"); + return -EPERM; + } + sentry_client_ctx.panic_enable = true; + } else if (strcmp(enable_str, "off") == 0) { + sentry_client_ctx.panic_enable = false; + } else { + pr_err("invalid value for panic mode\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_kernel_reboot_enable_file_write - Write handler for kernel reboot enable control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_enable_file_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for kernel_reboot mode, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set kernel_reboot mode failed\n"); + return -EFAULT; + } + + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + sentry_client_ctx.kernel_reboot_enable = true; + } else if (strcmp(enable_str, "off") == 0) { + sentry_client_ctx.kernel_reboot_enable = false; + } else { + pr_err("invalid value for kernel_reboot mode\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_uvb_comm_file_show - Show UVB communication enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_uvb_comm_file_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.use_uvb ? "on" : "off"; + size_t len = sentry_client_ctx.use_uvb ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_urma_comm_file_show - Show URMA communication enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_urma_comm_file_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.use_urma ? "on" : "off"; + size_t len = sentry_client_ctx.use_urma ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_uvb_comm_file_write - Write handler for UVB communication control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_uvb_comm_file_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for uvb_comm, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set uvb_comm failed\n"); + return -EFAULT; + } + + /* Remove trailing newline if present */ + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + sentry_client_ctx.use_uvb = true; + } else if (strcmp(enable_str, "off") == 0) { + if (!sentry_client_ctx.use_urma) { + pr_err("Cannot disable both URMA and UVB comm modes\n"); + return -EINVAL; + } + sentry_client_ctx.use_uvb = false; + } else { + pr_err("invalid value for uvb_comm\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_urma_comm_file_write - Write handler for URMA communication control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_urma_comm_file_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for urma_comm, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set urma_comm failed\n"); + return -EFAULT; + } + + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + sentry_client_ctx.use_urma = true; + } else if (strcmp(enable_str, "off") == 0) { + if (!sentry_client_ctx.use_uvb) { + pr_err("Cannot disable both URMA and UVB comm modes\n"); + return -EINVAL; + } + sentry_client_ctx.use_urma = false; + } else { + pr_err("invalid value for urma_comm\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_panic_timeout_show - Show panic timeout value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_panic_timeout_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char timeout_str[INTEGER_TO_STR_MAX_LEN]; + + snprintf(timeout_str, sizeof(timeout_str), "%ld\n", + sentry_client_ctx.panic_timeout_ms); + return simple_read_from_buffer(buf, count, ppos, timeout_str, strlen(timeout_str)); +} + +/** + * proc_kernel_reboot_timeout_show - Show kernel reboot timeout value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_timeout_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char timeout_str[INTEGER_TO_STR_MAX_LEN]; + + snprintf(timeout_str, sizeof(timeout_str), "%ld\n", + sentry_client_ctx.kernel_reboot_timeout_ms); + return simple_read_from_buffer(buf, count, ppos, timeout_str, strlen(timeout_str)); +} + +/** + * proc_reporter_cna_write - Write handler for CNA configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_reporter_cna_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + uint32_t val; + + ret = kstrtou32_from_user(ubuf, cnt, 10, &val); + if (ret) { + pr_err("parse input parameter for cna failed\n"); + return ret; + } + + if (val > CNA_MAX_VALUE) { + pr_err("set cna failed, max value is %u\n", CNA_MAX_VALUE); + return -EINVAL; + } + + if (sentry_client_ctx.is_uvb_cis_func_registered) { + /* Repeated registration will fail, unregister first */ + unregister_local_cis_func(UBIOS_CALL_ID_PANIC_CALL, UBIOS_USER_ID_UB_DEVICE); + } + + ret = register_local_cis_func(UBIOS_CALL_ID_PANIC_CALL, UBIOS_USER_ID_UB_DEVICE, + cis_ubios_remote_msg_cb); + if (ret) { + pr_err("uvb register function failed\n"); + return ret; + } + + sentry_client_ctx.is_uvb_cis_func_registered = true; + g_local_cna = val; + return cnt; +} + +/** + * proc_reporter_eid_write - Write handler for EID configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_reporter_eid_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + int eid_num = 0; + char eid_str_buf[LOCAL_EID_MAX_LEN]; + char eid_str_buf_tmp[LOCAL_EID_MAX_LEN]; + char eid_str_array[MAX_DIE_NUM][EID_MAX_LEN] = {0}; + union ubcore_eid eid_ub_buf[MAX_DIE_NUM] = {0}; + + if (cnt > LOCAL_EID_MAX_LEN) { + pr_err("invalid eid info, max len %d, actual %lu\n", + LOCAL_EID_MAX_LEN - 1, cnt); + return -EINVAL; + } + + ret = copy_from_user(eid_str_buf, ubuf, cnt); + if (ret) { + pr_err("set eid failed\n"); + return -EFAULT; + } + + if (cnt > 0 && eid_str_buf[cnt - 1] == '\n') + eid_str_buf[cnt - 1] = '\0'; + + if (cnt == LOCAL_EID_MAX_LEN && eid_str_buf[cnt - 1] != '\0') { + pr_err("invalid eid info, max len %d, actual %lu\n", + LOCAL_EID_MAX_LEN - 1, cnt); + return -EINVAL; + } + + memcpy(eid_str_buf_tmp, eid_str_buf, LOCAL_EID_MAX_LEN); + ret = process_multi_eid_string(eid_str_buf_tmp, eid_str_array, eid_ub_buf, + ";", MAX_DIE_NUM); + if (ret < 0) + return ret; + + eid_num = ret; + ret = sentry_create_urma_resource(eid_ub_buf, eid_num); + if (ret) + return ret; + + /* Valid EID, update global EID */ + for (int i = 0; i < eid_num; i++) { + memcpy(&sentry_client_ctx.eid[i], &eid_ub_buf[i], + sizeof(union ubcore_eid)); + snprintf(sentry_client_ctx.eid_str[i], EID_MAX_LEN, "%s", + eid_str_array[i]); + } + + sentry_client_ctx.die_num_configured = eid_num; + memcpy(sentry_client_ctx.eid_raw_str, eid_str_buf, LOCAL_EID_MAX_LEN); + return cnt; +} + +/** + * proc_panic_timeout_write - Write handler for panic timeout configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_panic_timeout_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + unsigned long val; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) { + pr_err("invalid value for panic_timeout\n"); + return ret; + } + + if (val < PANIC_TIMEOUT_MS_MIN || val > PANIC_TIMEOUT_MS_MAX) { + pr_err("panic_timeout range [%d, %d], current %lu\n", + PANIC_TIMEOUT_MS_MIN, PANIC_TIMEOUT_MS_MAX, val); + return -EINVAL; + } + + sentry_client_ctx.panic_timeout_ms = val; + return cnt; +} + +/** + * proc_kernel_reboot_timeout_write - Write handler for kernel reboot timeout configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_timeout_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + unsigned long val; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) { + pr_err("parse input parameter for kernel_reboot_timeout failed\n"); + return ret; + } + + if (val < KERNEL_REBOOT_TIMEOUT_MS_MIN || val > KERNEL_REBOOT_TIMEOUT_MS_MAX) { + pr_err("kernel_reboot_timeout range [%d, %d], current %lu\n", + KERNEL_REBOOT_TIMEOUT_MS_MIN, KERNEL_REBOOT_TIMEOUT_MS_MAX, val); + return -EINVAL; + } + + sentry_client_ctx.kernel_reboot_timeout_ms = val; + return cnt; +} + +/* Proc file operations structures */ +static const struct proc_ops proc_reporter_cna_file_operations = { + .proc_read = proc_reporter_cna_show, + .proc_write = proc_reporter_cna_write, +}; + +static const struct proc_ops proc_reporter_eid_file_operations = { + .proc_read = proc_reporter_eid_show, + .proc_write = proc_reporter_eid_write, +}; + +static const struct proc_ops proc_panic_enable_file_operations = { + .proc_read = proc_panic_reporter_enable_file_show, + .proc_write = proc_panic_enable_file_write, +}; + +static const struct proc_ops proc_kernel_reboot_enable_file_operations = { + .proc_read = proc_kernel_reboot_reporter_enable_file_show, + .proc_write = proc_kernel_reboot_enable_file_write, +}; + +static const struct proc_ops proc_uvb_comm_file_operations = { + .proc_read = proc_uvb_comm_file_show, + .proc_write = proc_uvb_comm_file_write, +}; + +static const struct proc_ops proc_urma_comm_file_operations = { + .proc_read = proc_urma_comm_file_show, + .proc_write = proc_urma_comm_file_write, +}; + +static const struct proc_ops proc_panic_timeout_file_operations = { + .proc_read = proc_panic_timeout_show, + .proc_write = proc_panic_timeout_write, +}; + +static const struct proc_ops proc_kernel_reboot_timeout_file_operations = { + .proc_read = proc_kernel_reboot_timeout_show, + .proc_write = proc_kernel_reboot_timeout_write, +}; + +/** + * init_sentry_remote_reporter_proc - Initialize proc filesystem entries + * + * Return: 0 on success, negative error code on failure + * + * This function creates all proc filesystem entries for the remote reporter + * module, allowing user-space configuration of various parameters. + */ +static int init_sentry_remote_reporter_proc(void) +{ + int ret = 0; + + sentry_client_ctx.panic_proc_dir = proc_mkdir_mode("sentry_remote_reporter", + PROC_DIR_PERMISSION, NULL); + if (!sentry_client_ctx.panic_proc_dir) { + pr_err("create /proc/sentry_remote_reporter dir failed\n"); + return -ENOMEM; + } + + ret |= sentry_create_proc_file("cna", sentry_client_ctx.panic_proc_dir, + &proc_reporter_cna_file_operations); + ret |= sentry_create_proc_file("eid", sentry_client_ctx.panic_proc_dir, + &proc_reporter_eid_file_operations); + ret |= sentry_create_proc_file("panic_timeout", sentry_client_ctx.panic_proc_dir, + &proc_panic_timeout_file_operations); + ret |= sentry_create_proc_file("kernel_reboot_timeout", + sentry_client_ctx.panic_proc_dir, + &proc_kernel_reboot_timeout_file_operations); + ret |= sentry_create_proc_file("panic", sentry_client_ctx.panic_proc_dir, + &proc_panic_enable_file_operations); + ret |= sentry_create_proc_file("kernel_reboot", sentry_client_ctx.panic_proc_dir, + &proc_kernel_reboot_enable_file_operations); + ret |= sentry_create_proc_file("uvb_comm", sentry_client_ctx.panic_proc_dir, + &proc_uvb_comm_file_operations); + ret |= sentry_create_proc_file("urma_comm", sentry_client_ctx.panic_proc_dir, + &proc_urma_comm_file_operations); + if (ret < 0) + proc_remove(sentry_client_ctx.panic_proc_dir); + + return ret; +} + +/* Notifier blocks for system events */ +static struct notifier_block panic_notifier = { + .notifier_call = panic_handler, + .priority = INT_MAX, +}; + +static struct notifier_block kernel_reboot_notifier = { + .notifier_call = kernel_reboot_handler, + .priority = INT_MAX, +}; + +/** + * sentry_remote_reporter_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + * + * This function initializes the remote reporter module, including: + * - Generating random ID + * - Initializing panic reporter + * - Allocating message buffers + * - Registering system notifiers + * - Creating proc filesystem entries + */ +static int __init sentry_remote_reporter_init(void) +{ + int ret; + int i; + + sentry_client_ctx.random_id = get_random_u32(); + + ret = sentry_panic_reporter_init(); + if (ret) + return ret; + + sentry_client_ctx.msg_str = kzalloc(MAX_NODE_NUM * 2 * sizeof(char *), GFP_KERNEL); + if (!sentry_client_ctx.msg_str) { + pr_err("Failed to allocate memory for msg_str\n"); + ret = -ENOMEM; + goto stop_kthread; + } + + for (i = 0; i < MAX_NODE_NUM * 2; i++) { + sentry_client_ctx.msg_str[i] = kzalloc(URMA_SEND_DATA_MAX_LEN, GFP_KERNEL); + if (!sentry_client_ctx.msg_str[i]) { + pr_err("Failed to allocate memory for msg_str[%d]\n", i); + free_char_array(sentry_client_ctx.msg_str, i); + ret = -ENOMEM; + goto stop_kthread; + } + } + + ret = register_reboot_notifier(&kernel_reboot_notifier); + if (ret) { + pr_err("Failed to register kernel reboot handler: %d\n", ret); + goto free_msg_str; + } + pr_info("Kernel reboot handler registered\n"); + + ret = atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier); + if (ret) { + pr_err("Failed to register panic handler: %d\n", ret); + goto unregister_kernel_reboot; + } + + ret = init_sentry_remote_reporter_proc(); + if (ret) { + pr_err("Failed to create sentry_remote_reporter proc: %d\n", ret); + goto unregister_panic; + } + + pr_info("Panic handler registered\n"); + return 0; + +unregister_panic: + atomic_notifier_chain_unregister(&panic_notifier_list, &panic_notifier); +unregister_kernel_reboot: + unregister_reboot_notifier(&kernel_reboot_notifier); +free_msg_str: + free_char_array(sentry_client_ctx.msg_str, MAX_NODE_NUM * 2); +stop_kthread: + sentry_panic_reporter_exit(); + return ret; +} + +/** + * sentry_remote_reporter_exit - Module cleanup function + * + * This function cleans up all resources allocated by the remote reporter module, + * including unregistering notifiers, freeing memory, and removing proc entries. + */ +static void __exit sentry_remote_reporter_exit(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, &panic_notifier); + pr_info("Panic handler unregistered\n"); + + unregister_reboot_notifier(&kernel_reboot_notifier); + pr_info("Kernel reboot handler unregistered\n"); + + free_char_array(sentry_client_ctx.msg_str, MAX_NODE_NUM * 2); + + if (sentry_client_ctx.panic_proc_dir) + proc_remove(sentry_client_ctx.panic_proc_dir); + + sentry_panic_reporter_exit(); + + if (sentry_client_ctx.is_uvb_cis_func_registered) { + unregister_local_cis_func(UBIOS_CALL_ID_PANIC_CALL, UBIOS_USER_ID_UB_DEVICE); + pr_info("UVB CIS function unregistered\n"); + } +} + +module_init(sentry_remote_reporter_init); +module_exit(sentry_remote_reporter_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("sxt1001"); +MODULE_DESCRIPTION("sentry_remote_reporter module"); +MODULE_VERSION("1.0"); diff --git a/drivers/ub/sentry/sentry_remote_reporter.h b/drivers/ub/sentry/sentry_remote_reporter.h new file mode 100644 index 0000000000000000000000000000000000000000..5120e9512567ee85cff2b17df07910c086a24c2b --- /dev/null +++ b/drivers/ub/sentry/sentry_remote_reporter.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Header File for sentry module + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#ifndef SENTRY_REMOTE_REPORTER_H +#define SENTRY_REMOTE_REPORTER_H + +#include +#include +#include +#include +#include + +#include "smh_common_type.h" + +extern void set_urma_panic_mode(bool is_panic); + +// sentry uvb global variable +extern uint32_t g_server_cna_array[MAX_NODE_NUM]; +extern int g_server_cna_valid_num; +extern int cis_ubios_remote_msg_cb(struct cis_message *cis_msg); + +// sentry urma global variable and functions +extern bool g_is_created_ubcore_resource; +extern int str_to_eid(const char *buf, union ubcore_eid *eid); +extern int match_index_by_remote_ub_eid(union ubcore_eid remote_id, int *node_index, int *die_index); +extern int sentry_create_urma_resource(union ubcore_eid eid[], int eid_num); +extern int process_multi_eid_string(char *eid_buf, char eid_array[][EID_MAX_LEN], + union ubcore_eid eid_tmp[], const char *sepstr, int eid_max_num); + +enum SENTRY_REMOTE_COMM_TYPE { + COMM_TYPE_URMA, + COMM_TYPE_UVB, + COMM_TYPE_UNKNOWN +}; + +struct child_thread_process_data { + struct sentry_msg_helper_msg *msg; + enum SENTRY_REMOTE_COMM_TYPE comm_type; + uint32_t random_id; +}; + +struct node_msg_info { + uint32_t random_id; + uint64_t start_send_time; + uint64_t msgid; +}; + +struct sentry_remote_context { + struct node_msg_info node_msg_info_list[MAX_NODE_NUM]; + struct sentry_msg_helper_msg remote_event_ack_msg_buf; + atomic_t remote_event_ack_received; + atomic_t remote_event_ack_done; + struct task_struct *urma_receiver_thread; +}; + +extern spinlock_t sentry_buf_lock; +extern struct sentry_remote_context sentry_remote_ctx; + +int sentry_panic_reporter_init(void); +void sentry_panic_reporter_exit(void); + +int send_msg_to_userspace_and_ack(struct sentry_msg_helper_msg *msg, enum SENTRY_REMOTE_COMM_TYPE comm_type, + uint32_t random_id, enum sentry_msg_helper_msg_type ack_type); + +void write_ack_msg_buf(const struct sentry_msg_helper_msg *msg, enum SENTRY_REMOTE_COMM_TYPE comm_type); +int create_kthread_to_process_msg(const char *event_msg, enum SENTRY_REMOTE_COMM_TYPE comm_type); +enum sentry_msg_helper_msg_type get_ack_type(enum sentry_msg_helper_msg_type event_type); +#endif diff --git a/drivers/ub/sentry/sentry_remote_server.c b/drivers/ub/sentry/sentry_remote_server.c new file mode 100644 index 0000000000000000000000000000000000000000..42b24a3a0bd916008723688163d51674b0e1a707 --- /dev/null +++ b/drivers/ub/sentry/sentry_remote_server.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: Server module, used for reporting panic or reboot msg to the + * userspace and forward ack msg to the client + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" +#include "sentry_remote_reporter.h" + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][remote server]: " fmt + +struct sentry_remote_context sentry_remote_ctx; +DEFINE_SPINLOCK(sentry_buf_lock); + +static DEFINE_MUTEX(sentry_msg_info_mutex); + +/** + * send_msg_to_userspace - Send message to userspace with proper tracking + * @msg: Message to send + * @comm_type: Communication type (URMA or UVB) + * @random_id: Random identifier for message tracking + * + * Return: 0 on success, negative error code on failure + * + * This function sends a message to userspace and tracks it using node message + * info for acknowledgment handling. + */ +int send_msg_to_userspace(struct sentry_msg_helper_msg *msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type, uint32_t random_id) +{ + int ret; + int node_idx = -1; + int die_index = -1; + union ubcore_eid dst_ubcore_eid; + + pr_info("send %s message to userspace\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb"); + + if (comm_type == COMM_TYPE_URMA) { + if (str_to_eid(msg->helper_msg_info.remote_info.eid, &dst_ubcore_eid) < 0) { + pr_err("send_msg_to_userspace: invalid dst eid [%s]\n", + msg->helper_msg_info.remote_info.eid); + return -EINVAL; + } + match_index_by_remote_ub_eid(dst_ubcore_eid, &node_idx, &die_index); + } else if (comm_type == COMM_TYPE_UVB) { + int i; + + for (i = 0; i < g_server_cna_valid_num; i++) { + if (msg->helper_msg_info.remote_info.cna == g_server_cna_array[i]) { + node_idx = i; + break; + } + } + } + + if (node_idx < 0) { + pr_err("Invalid cna: %u or eid: %s of msg, stop to send to userspace\n", + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid); + return -EINVAL; + } + + mutex_lock(&sentry_msg_info_mutex); + if (sentry_remote_ctx.node_msg_info_list[node_idx].random_id != random_id) { + pr_info("Get new message from cna: %u, eid: %s\n", + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid); + sentry_remote_ctx.node_msg_info_list[node_idx].start_send_time = ktime_get_ns(); + sentry_remote_ctx.node_msg_info_list[node_idx].msgid = smh_get_new_msg_id(); + sentry_remote_ctx.node_msg_info_list[node_idx].random_id = random_id; + } + msg->start_send_time = sentry_remote_ctx.node_msg_info_list[node_idx].start_send_time; + msg->msgid = sentry_remote_ctx.node_msg_info_list[node_idx].msgid; + mutex_unlock(&sentry_msg_info_mutex); + + ret = smh_message_send(msg, true); + return ret; +} + +/** + * send_msg_to_userspace_and_ack - Send message to userspace and wait for acknowledgment + * @msg: Message to send + * @comm_type: Communication type (URMA or UVB) + * @random_id: Random identifier for message tracking + * @ack_type: Type of acknowledgment expected + * + * Return: 0 on success, negative error code on failure + * + * This function sends a message to userspace, waits for acknowledgment, and + * sends acknowledgment back to the remote node. + */ +int send_msg_to_userspace_and_ack(struct sentry_msg_helper_msg *msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type, + uint32_t random_id, enum sentry_msg_helper_msg_type ack_type) +{ + int ret; + int times = msg->timeout_time / MILLISECONDS_OF_EACH_MDELAY; + int i, j; + + ret = send_msg_to_userspace(msg, comm_type, random_id); + if (ret) { + pr_err("Failed to send remote message to userspace\n"); + return ret; + } + + /* Wait for acknowledgment from userspace */ + for (i = 0; i < times; i++) { + uint64_t cur_time = ktime_get_ns(); + + ret = smh_message_get_ack(msg); + if (!ret) { + int sleep_time = MILLISECONDS_OF_EACH_MDELAY - + (int)((ktime_get_ns() - cur_time) / NSEC_PER_MSEC); + if (sleep_time > 0) + msleep_interruptible(sleep_time); + continue; + } + + /* Get acknowledgment success, send acknowledgment message */ + char send_ack[URMA_SEND_DATA_MAX_LEN]; + + ret = snprintf(send_ack, URMA_SEND_DATA_MAX_LEN, "%d_%u_%s_%lu", + ack_type, + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid, + msg->res); + if ((size_t)ret >= URMA_SEND_DATA_MAX_LEN) { + pr_err("msg str size exceeds the max value\n"); + return -EINVAL; + } + + pr_info("Start to send %s ack msg to %u\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", + msg->helper_msg_info.remote_info.cna); + + if (comm_type == COMM_TYPE_URMA) { + /* Retry URMA acknowledgment sending */ + for (j = 0; j < URMA_ACK_RETRY_NUM; j++) { + ret = urma_send(send_ack, sizeof(send_ack), + msg->helper_msg_info.remote_info.eid, -1); + if (ret == COMM_PARM_NOT_SET) + break; + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY); + } + } else { + /* UVB is a reliable protocol, no need to resend */ + ret = uvb_send(send_ack, msg->helper_msg_info.remote_info.cna, false); + } + + if (ret <= 0) { + pr_warn("Failed to send %s ack message to client (cna:%u, eid:%s)\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid); + return -EFAULT; + } + return 0; + } + + return -ETIMEDOUT; +} + +/** + * get_ack_type - Get acknowledgment type for given event type + * @event_type: Event type to get acknowledgment for + * + * Return: Corresponding acknowledgment type + */ +enum sentry_msg_helper_msg_type get_ack_type(enum sentry_msg_helper_msg_type event_type) +{ + enum sentry_msg_helper_msg_type ack_type; + + switch (event_type) { + case SMH_MESSAGE_PANIC: + ack_type = SMH_MESSAGE_PANIC_ACK; + break; + case SMH_MESSAGE_KERNEL_REBOOT: + ack_type = SMH_MESSAGE_KERNEL_REBOOT_ACK; + break; + default: + pr_warn("Invalid event type!\n"); + ack_type = SMH_MESSAGE_UNKNOWN; + } + + return ack_type; +} + +/** + * process_remote_event_msg - Process remote event message in kernel thread context + * @data: Pointer to child_thread_process_data structure containing message info + * + * Return: 0 on success, negative error code on failure + */ +static int process_remote_event_msg(void *data) +{ + int ret; + enum sentry_msg_helper_msg_type ack_type; + struct child_thread_process_data *child_data = data; + + try_module_get(THIS_MODULE); + + ack_type = get_ack_type(child_data->msg->type); + if (ack_type == SMH_MESSAGE_UNKNOWN) { + ret = -EINVAL; + goto cleanup_child; + } + + ret = send_msg_to_userspace_and_ack(child_data->msg, child_data->comm_type, + child_data->random_id, ack_type); + +cleanup_child: + kfree(child_data->msg); + kfree(child_data); + module_put(THIS_MODULE); + return ret; +} + +/** + * write_ack_msg_buf - Write acknowledgment message to shared buffer + * @msg: Acknowledgment message + * @comm_type: Communication type (URMA or UVB) + * + * This function writes an acknowledgment message to a shared buffer for + * inter-process communication, ensuring thread-safe access. + */ +void write_ack_msg_buf(const struct sentry_msg_helper_msg *msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type) +{ + if (atomic_inc_return(&sentry_remote_ctx.remote_event_ack_received) == 1) { + pr_info("Receive ack message from %s: [%d_%u_%s_%lu]. Start to update buf\n", + comm_type == COMM_TYPE_URMA ? "URMA" : "UVB", + msg->type, + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid, + msg->res); + + spin_lock(&sentry_buf_lock); + memcpy(&sentry_remote_ctx.remote_event_ack_msg_buf, msg, + sizeof(sentry_remote_ctx.remote_event_ack_msg_buf)); + spin_unlock(&sentry_buf_lock); + atomic_set(&sentry_remote_ctx.remote_event_ack_done, 1); + } +} + +/** + * create_kthread_to_process_msg - Create kernel thread to process incoming message + * @event_msg: Raw event message string + * @comm_type: Communication type (URMA or UVB) + * + * Return: 0 on success, negative error code on failure + * + * This function creates a kernel thread to process incoming remote messages, + * handling both panic/reboot events and acknowledgment messages. + */ +int create_kthread_to_process_msg(const char *event_msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type) +{ + int ret; + struct sentry_msg_helper_msg msg; + uint32_t random_id; + struct child_thread_process_data *child_data; + struct task_struct *child_thread; + + ret = convert_str_to_smh_msg(event_msg, &msg, &random_id); + if (ret) { + pr_err("convert %s data to smh msg failed. msg [%s]\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", event_msg); + return -EINVAL; + } + + if (msg.type != SMH_MESSAGE_PANIC && msg.type != SMH_MESSAGE_KERNEL_REBOOT) { + /* Write acknowledgment message to shared memory */ + write_ack_msg_buf(&msg, comm_type); + return 0; + } + + child_data = kzalloc(sizeof(*child_data), GFP_KERNEL); + if (!child_data) { + pr_err("Failed to allocate memory for child_data\n"); + return -ENOMEM; + } + + child_data->msg = kzalloc(sizeof(*child_data->msg), GFP_KERNEL); + if (!child_data->msg) { + kfree(child_data); + pr_err("Failed to allocate memory for child_data->msg\n"); + return -ENOMEM; + } + + /* Update child thread data */ + memcpy(child_data->msg, &msg, sizeof(*child_data->msg)); + child_data->random_id = random_id; + child_data->comm_type = comm_type; + + child_thread = kthread_run(process_remote_event_msg, child_data, + "sentry_msg_thread_%s_%u", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", + random_id); + if (IS_ERR(child_thread)) { + kfree(child_data->msg); + kfree(child_data); + pr_err("Failed to create child thread\n"); + return PTR_ERR(child_thread); + } + + return 0; +} + +/** + * process_urma_data - Process URMA data in kernel thread + * @data: Thread data (unused) + * + * Return: 0 on success, negative error code on failure + * + * This function runs in a kernel thread to receive and process URMA messages, + * creating separate threads for message processing. + */ +static int process_urma_data(void *data) +{ + int ret = 0; + int recv_msg_nodes = 0; + char **msg_str; + int i; + + msg_str = kcalloc(MAX_NODE_NUM * MAX_DIE_NUM, sizeof(*msg_str), GFP_KERNEL); + if (!msg_str) { + pr_err("Failed to allocate memory for msg_str!\n"); + return -ENOMEM; + } + + for (i = 0; i < MAX_NODE_NUM * MAX_DIE_NUM; i++) { + msg_str[i] = kzalloc(URMA_SEND_DATA_MAX_LEN, GFP_KERNEL); + if (!msg_str[i]) { + pr_err("Failed to allocate memory for msg_str[%d]!\n", i); + ret = -ENOMEM; + goto free_msg; + } + } + + while (!kthread_should_stop()) { + /* Listen for URMA messages */ + recv_msg_nodes = urma_recv(msg_str, URMA_SEND_DATA_MAX_LEN); + if (recv_msg_nodes <= 0) { + /* + * Prevent processes from entering the D state if reboot event + * occurs on the current node + */ + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY); + continue; + } + + pr_info("urma messages are received, the number of nodes that are successfully received is %d\n", + recv_msg_nodes); + + for (i = 0; i < recv_msg_nodes; i++) { + if (strcmp(HEARTBEAT, msg_str[i]) == 0 || + strcmp(HEARTBEAT_ACK, msg_str[i]) == 0) + continue; + + ret = create_kthread_to_process_msg(msg_str[i], COMM_TYPE_URMA); + if (ret == -ENOMEM) + goto free_msg; + } + + /* + * Prevent processes from entering the D state if reboot event + * occurs on the current node + */ + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY); + } + +free_msg: + free_char_array(msg_str, MAX_NODE_NUM); + + pr_info("Urma receiver thread stopped!\n"); + return ret; +} + +/** + * cis_ubios_remote_msg_cb - UVB remote message callback + * @cis_msg: CIS message from UVB + * + * Return: 0 on success, negative error code on failure + * + * This function serves as the callback for UVB remote messages, + * processing incoming messages through the appropriate mechanism. + */ +int cis_ubios_remote_msg_cb(struct cis_message *cis_msg) +{ + int ret; + + pr_info("uvb get msg: [%s]\n", (char *)cis_msg->input); + ret = create_kthread_to_process_msg((char *)cis_msg->input, COMM_TYPE_UVB); + return ret; +} + +/** + * sentry_panic_reporter_init - Initialize sentry panic reporter module + * + * Return: 0 on success, negative error code on failure + */ +int sentry_panic_reporter_init(void) +{ + atomic_set(&sentry_remote_ctx.remote_event_ack_received, 0); + atomic_set(&sentry_remote_ctx.remote_event_ack_done, 0); + + sentry_remote_ctx.urma_receiver_thread = kthread_run(process_urma_data, NULL, "sentry_urma_kthread"); + if (IS_ERR(sentry_remote_ctx.urma_receiver_thread)) { + pr_err("Failed to create kernel urma receiver thread.\n"); + return PTR_ERR(sentry_remote_ctx.urma_receiver_thread); + } + + pr_info("Create kernel urma receiver thread success.\n"); + return 0; +} + +/** + * sentry_panic_reporter_exit - Cleanup sentry panic reporter module + */ +void sentry_panic_reporter_exit(void) +{ + if (sentry_remote_ctx.urma_receiver_thread) { + kthread_stop(sentry_remote_ctx.urma_receiver_thread); + sentry_remote_ctx.urma_receiver_thread = NULL; + pr_info("Kernel urma receiver thread stopped\n"); + } +} diff --git a/drivers/ub/sentry/sentry_reporter.c b/drivers/ub/sentry/sentry_reporter.c new file mode 100644 index 0000000000000000000000000000000000000000..76ebb3d86b6ffca5f1e1ea4856e9b65126a966bc --- /dev/null +++ b/drivers/ub/sentry/sentry_reporter.c @@ -0,0 +1,597 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: report oom and reboot event to userspace + * Author: Luckky + * Create: 2025-02-17 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" + +#define REBOOT_RESULT_SUCCESS 0 +#define MAX_TIMEOUT 3600000 +#define FD_MODE 0 +#define NUMA_MODE 1 + +static DEFINE_RATELIMIT_STATE(oom_log_rs, HZ, 5); + +static unsigned int reboot_timeout_ms = 30000; +static unsigned int oom_timeout_ms = 30000; +module_param(reboot_timeout_ms, uint, 0444); +module_param(oom_timeout_ms, uint, 0444); + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][reporter]: " fmt + +static struct proc_dir_entry *g_sentry_reporter_proc_dir; + +static int g_ub_mem_fault_with_kill = 1; +static bool g_oom_enable; +static bool g_power_off_enable; +static bool g_ub_mem_fault_enable; + +/** + * check_if_timeout_param_valid - Validate timeout parameters + * + * Return: 0 if valid, negative error code otherwise + */ +static int check_if_timeout_param_valid(void) +{ + if (reboot_timeout_ms > MAX_TIMEOUT) { + pr_err("reboot timeout is out of range! (valid range: [0, %u], current value: %u)\n", + MAX_TIMEOUT, reboot_timeout_ms); + return -EINVAL; + } + + if (oom_timeout_ms > MAX_TIMEOUT) { + pr_err("oom timeout is out of range! (valid range: [0, %u], current value: %u)\n", + MAX_TIMEOUT, oom_timeout_ms); + return -EINVAL; + } + + return 0; +} + +/** + * smh_message_retry_send - Send message with retry mechanism + * @msg: Message to send + * @ack: Whether acknowledgment is required + * + * Return: 0 on success, negative error code on failure + */ +static int smh_message_retry_send(struct sentry_msg_helper_msg *msg, bool ack) +{ + int ret; + int i; + int times = msg->timeout_time / MILLISECONDS_OF_EACH_MDELAY; + + msg->start_send_time = ktime_get_ns(); + msg->msgid = smh_get_new_msg_id(); + + for (i = 0; i < times; i++) { + uint64_t cur_time = ktime_get_ns(); + + ret = smh_message_send(msg, ack); + if (!ack) + return ret; + + ret = smh_message_get_ack(msg); + if (ret) + return 0; + + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY - + (ktime_get_ns() - cur_time) / NSEC_PER_MSEC); + } + + if (msg->type == SMH_MESSAGE_OOM && __ratelimit(&oom_log_rs)) + pr_info("message %llu is timeout\n", msg->msgid); + + return -ETIMEDOUT; +} + +/** + * acpi_power_notifier_callback - ACPI power button notification handler + * @nb: Notifier block + * @action: Action type + * @data: Callback data + * + * Return: NOTIFY_OK on success, NOTIFY_BAD on failure + */ +static int acpi_power_notifier_callback(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret; + struct sentry_msg_helper_msg msg; + + if (!g_power_off_enable) + return NOTIFY_OK; + + msg.type = SMH_MESSAGE_POWER_OFF; + msg.timeout_time = reboot_timeout_ms + REPORT_COMM_TIME; + + pr_info("send sentry reboot message\n"); + ret = smh_message_retry_send(&msg, true); + if (ret || msg.res != REBOOT_RESULT_SUCCESS) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static struct notifier_block acpi_power_notifier = { + .notifier_call = acpi_power_notifier_callback, + .priority = INT_MAX, +}; + +/** + * lowmem_notifier_callback - Low memory notification handler + * @nb: Notifier block + * @action: Action type + * @parm: Callback data containing reclaim information + * + * Return: NOTIFY_OK + */ +static int lowmem_notifier_callback(struct notifier_block *nb, + unsigned long action, void *parm) +{ + struct reclaim_notify_data *data = parm; + struct sentry_msg_helper_msg msg; + int ret; + int i; + + if (!g_oom_enable) + return NOTIFY_OK; + + if (data->reason > RR_HUGEPAGE_RECLAIM) + return NOTIFY_OK; + + if (__ratelimit(&oom_log_rs)) { + pr_info("got lowmem message. pid=%d sync=%d reason=%d\n", + current->pid, data->sync, data->reason); + } + + for (i = 0; i < OOM_EVENT_MAX_NUMA_NODES; i++) + msg.helper_msg_info.oom_info.nid[i] = -1; + + msg.type = SMH_MESSAGE_OOM; + msg.helper_msg_info.oom_info.nr_nid = data->nr_nid > OOM_EVENT_MAX_NUMA_NODES ? + OOM_EVENT_MAX_NUMA_NODES : data->nr_nid; + for (i = 0; i < msg.helper_msg_info.oom_info.nr_nid; i++) + msg.helper_msg_info.oom_info.nid[i] = data->nid[i]; + + msg.helper_msg_info.oom_info.sync = data->sync; + msg.helper_msg_info.oom_info.timeout = oom_timeout_ms; + msg.helper_msg_info.oom_info.reason = data->reason; + msg.timeout_time = oom_timeout_ms + REPORT_COMM_TIME; + + ret = smh_message_retry_send(&msg, data->sync); + if (ret) + data->nr_freed = 0; + else + data->nr_freed = msg.res; + + return NOTIFY_OK; +} + +static struct notifier_block lowmem_notifier = { + .notifier_call = lowmem_notifier_callback, + .priority = INT_MAX, +}; + +/** + * proc_oom_enable_write - Write handler for oom proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_oom_enable_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char oom_enable[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for oom, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(oom_enable, ubuf, cnt); + if (ret) { + pr_err("set oom failed\n"); + return -EFAULT; + } + + if (cnt > 0 && oom_enable[cnt - 1] == '\n') + oom_enable[cnt - 1] = '\0'; + + if (strcmp(oom_enable, "on") == 0) { + g_oom_enable = true; + } else if (strcmp(oom_enable, "off") == 0) { + g_oom_enable = false; + } else { + pr_err("invalid value for oom\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_oom_enable_show - Read handler for oom proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_oom_enable_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_oom_enable ? "on" : "off"; + size_t len = g_oom_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_oom_file_operations = { + .proc_read = proc_oom_enable_show, + .proc_write = proc_oom_enable_write, +}; + +/** + * proc_power_off_enable_write - Write handler for power_off proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_power_off_enable_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char power_off_enable[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for power_off, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(power_off_enable, ubuf, cnt); + if (ret) { + pr_err("set power_off failed\n"); + return -EFAULT; + } + + if (cnt > 0 && power_off_enable[cnt - 1] == '\n') + power_off_enable[cnt - 1] = '\0'; + + if (strcmp(power_off_enable, "on") == 0) { + g_power_off_enable = true; + } else if (strcmp(power_off_enable, "off") == 0) { + g_power_off_enable = false; + } else { + pr_err("invalid value for power_off\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_power_off_enable_show - Read handler for power_off proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_power_off_enable_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_power_off_enable ? "on" : "off"; + size_t len = g_power_off_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_power_off_enable_file_operations = { + .proc_read = proc_power_off_enable_show, + .proc_write = proc_power_off_enable_write, +}; + +/** + * proc_ub_mem_fault_enable_write - Write handler for ub_mem_fault proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_enable_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char ub_mem_fault_enable[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for ub_mem_fault, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(ub_mem_fault_enable, ubuf, cnt); + if (ret) { + pr_err("set ub_mem_fault failed\n"); + return -EFAULT; + } + + if (cnt > 0 && ub_mem_fault_enable[cnt - 1] == '\n') + ub_mem_fault_enable[cnt - 1] = '\0'; + + if (strcmp(ub_mem_fault_enable, "on") == 0) { + g_ub_mem_fault_enable = true; + } else if (strcmp(ub_mem_fault_enable, "off") == 0) { + g_ub_mem_fault_enable = false; + } else { + pr_err("invalid value for ub_mem_fault\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_ub_mem_fault_enable_show - Read handler for ub_mem_fault proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_enable_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_ub_mem_fault_enable ? "on" : "off"; + size_t len = g_ub_mem_fault_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_ub_mem_fault_enable_file_operations = { + .proc_read = proc_ub_mem_fault_enable_show, + .proc_write = proc_ub_mem_fault_enable_write, +}; + +/** + * proc_ub_mem_fault_with_kill_write - Write handler for ub_mem_fault_with_kill proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_with_kill_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char ub_mem_fault_with_kill[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for ub_mem_fault_with_kill, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(ub_mem_fault_with_kill, ubuf, cnt); + if (ret) { + pr_err("set ub_mem_fault_with_kill failed\n"); + return -EFAULT; + } + + if (cnt > 0 && ub_mem_fault_with_kill[cnt - 1] == '\n') + ub_mem_fault_with_kill[cnt - 1] = '\0'; + + if (strcmp(ub_mem_fault_with_kill, "on") == 0) { + g_ub_mem_fault_with_kill = 1; + } else if (strcmp(ub_mem_fault_with_kill, "off") == 0) { + g_ub_mem_fault_with_kill = 0; + } else { + pr_err("invalid value for ub_mem_fault_with_kill\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_ub_mem_fault_with_kill_show - Read handler for ub_mem_fault_with_kill proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_with_kill_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_ub_mem_fault_with_kill ? "on" : "off"; + size_t len = g_ub_mem_fault_with_kill ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_ub_mem_fault_with_kill_file_operations = { + .proc_read = proc_ub_mem_fault_with_kill_show, + .proc_write = proc_ub_mem_fault_with_kill_write, +}; + +/** + * ub_mem_ras_handler - UB memory RAS error handler + * @phys_addr: Physical address of the error + * @err_type: Error type + * + * Return: 0 on success + */ +static int ub_mem_ras_handler(uint64_t phys_addr, enum ras_err_type err_type) +{ + struct sentry_msg_helper_msg msg; + struct page *page; + int ret; + + if (!g_ub_mem_fault_enable) + return NOTIFY_OK; + + pr_info("ub mem error: type=%d\n", err_type); + + msg.helper_msg_info.ub_mem_info.pa = phys_addr; + msg.helper_msg_info.ub_mem_info.raw_ubus_mem_err_type = err_type; + msg.msgid = smh_get_new_msg_id(); + msg.type = SMH_MESSAGE_UB_MEM_ERR; + msg.start_send_time = ktime_get_ns(); + msg.timeout_time = ULLONG_MAX; + + if ((err_type == REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR || + err_type == UB_MEM_READ_DATA_ERR || + err_type == UB_MEM_FLOW_POISON || + err_type == UB_MEM_READ_DATA_POISON || + err_type == UB_MEM_READ_DATA_RESPERR) && g_ub_mem_fault_with_kill) { + msg.helper_msg_info.ub_mem_info.fault_with_kill = 1; + } else { + msg.helper_msg_info.ub_mem_info.fault_with_kill = 0; + } + + /* Check mode (FD or NUMA) */ + page = pfn_to_online_page(PHYS_PFN(phys_addr)); + + if (!page) { + /* FD mode */ + msg.helper_msg_info.ub_mem_info.mem_type = FD_MODE; + pr_info("ub mem error: mem mode is fd mode\n"); + } else { + /* NUMA mode */ + msg.helper_msg_info.ub_mem_info.mem_type = NUMA_MODE; + pr_info("ub mem error: mem mode is numa mode\n"); + if (msg.helper_msg_info.ub_mem_info.fault_with_kill) + memory_failure_queue(PHYS_PFN(phys_addr), 0); + } + + ret = smh_message_send(&msg, false); + if (ret) + pr_err("Failed to send remote message to userspace. %d\n", ret); + + return 0; +} + +/** + * sentry_reporter_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + */ +static int __init sentry_reporter_init(void) +{ + int ret; + + ret = check_if_timeout_param_valid(); + if (ret) + return ret; + + g_sentry_reporter_proc_dir = proc_mkdir_mode("sentry_reporter", + PROC_DIR_PERMISSION, NULL); + if (!g_sentry_reporter_proc_dir) { + pr_err("create /proc/sentry_reporter dir failed\n"); + return -ENOMEM; + } + + ret = sentry_create_proc_file("ub_mem_fault_with_kill", + g_sentry_reporter_proc_dir, + &proc_ub_mem_fault_with_kill_file_operations); + ret |= sentry_create_proc_file("oom", + g_sentry_reporter_proc_dir, + &proc_oom_file_operations); + ret |= sentry_create_proc_file("power_off", + g_sentry_reporter_proc_dir, + &proc_power_off_enable_file_operations); + ret |= sentry_create_proc_file("ub_mem_fault", + g_sentry_reporter_proc_dir, + &proc_ub_mem_fault_enable_file_operations); + if (ret < 0) + goto remove_proc_dir; + + ret = register_acpi_power_notifier(&acpi_power_notifier); + pr_info("power notifier register %s\n", ret ? "failed" : "successful"); + if (ret) + goto remove_proc_dir; + + ret = register_reclaim_notifier(&lowmem_notifier); + pr_info("lowmem notifier register %s\n", ret ? "failed" : "successful"); + if (ret) + goto unregister_power_notifier; + + ub_mem_ras_handler_register(ub_mem_ras_handler); + pr_info("ubus notifier register successful\n"); + + return 0; + +unregister_power_notifier: + unregister_acpi_power_notifier(&acpi_power_notifier); + pr_info("power notifier unregistered\n"); +remove_proc_dir: + proc_remove(g_sentry_reporter_proc_dir); + pr_info("proc file removed\n"); + return ret; +} + +/** + * sentry_reporter_exit - Module cleanup function + */ +static void __exit sentry_reporter_exit(void) +{ + unregister_acpi_power_notifier(&acpi_power_notifier); + pr_info("power notifier unregistered\n"); + + unregister_reclaim_notifier(&lowmem_notifier); + pr_info("lowmem notifier unregistered\n"); + + ub_mem_ras_handler_unregister(); + pr_info("ub_mem notifier unregistered\n"); + + proc_remove(g_sentry_reporter_proc_dir); + pr_info("proc file removed\n"); +} + +module_init(sentry_reporter_init); +module_exit(sentry_reporter_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Luckky"); +MODULE_DESCRIPTION("sentry reporter: report kernel events to userspace"); +MODULE_VERSION("1.0"); diff --git a/drivers/ub/sentry/sentry_urma_comm.c b/drivers/ub/sentry/sentry_urma_comm.c new file mode 100644 index 0000000000000000000000000000000000000000..967a2f036541fb4ae30fb191936282632dc03cea --- /dev/null +++ b/drivers/ub/sentry/sentry_urma_comm.c @@ -0,0 +1,2313 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: urma communication module + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_common_type.h" + +static int heartbeat_thread(void *arg); +static int rebuild_tjetty(int idx, int die_index); +static int sentry_post_jetty_send_wr(const char *buf, size_t len, int tjetty_idx, int die_index); +static int sentry_poll_jfc(struct ubcore_jfc *jfc, int cr_cnt, struct ubcore_cr *cr, int die_index); + +#define PROC_DEVICE_PATH "sentry_urma_comm" +#define PROC_DEVICE_NAME "client_info" +#define PROC_HEARTBEAT_SWITCH "heartbeat" +#define ENABLE_VALUE_MAX_LEN 4 /* 'off' + '\n' */ +#define MAX_JFC_DEPTH 96 +#define MAX_JFR_DEPTH 96 +#define MAX_JFS_DEPTH 96 +#define MAX_SGE 1 +#define MIN_RNR_TIMER 17 /* timeout time is 2^17*4.096usec≈536ms */ +#define SGE_MAX_LEN 4096 +#define DEFAULT_INVALID_JETTY_ID (-1) +#define MIN_JETTY_ID 3 +#define MAX_JETTY_ID 1023 +#define JETTY_ID_MAX_LEN 6 +#define UVS_IPV4_MAP_IPV6_PREFIX 0x0000ffff +#define URMA_CNT_MAX_NUM (1U << 20) +#define HB_WAIT_ACK_SLEEP_MS 3000 +#define HEARTBEAT_INTERVAL_MS 60000 /* 60s */ +#define URMA_LOCK 1 +#define URMA_UNLOCK 0 +#define EID_PART_NUM 8 +#define CLIENT_INFO_MAX_LEN (((EID_MAX_LEN + 1) * MAX_NODE_NUM - 1) * 2 + 1 + 1 + \ + JETTY_ID_MAX_LEN + 1) +/* The maximum length of the server_eid content in client info */ +#define SERVER_EID_PART_MAX_LEN (((EID_MAX_LEN + 1) * MAX_NODE_NUM - 1) * 2 + 1 + 1) +#define SINGLE_SERVER_PART_LEN ((EID_MAX_LEN + 1) * MAX_NODE_NUM - 1 + 1) + +/* + * 32 * (EID_MAX_LEN + 1) + 32 (31 * ";" + 1 * " ") + jetty_id + '\n' + '\0' + + * "server_id:, client_jetty_id:" + */ +#define CLIENT_INFO_BUF_MAX_LEN ((MAX_NODE_NUM + 1) * (EID_MAX_LEN + 1) + JETTY_ID_MAX_LEN + 35) + +struct ubcore_dev_list { + struct ubcore_device *dev; + struct list_head list; +}; + +LIST_HEAD(ub_dev_list_head); + +static struct ubcore_jfc_cfg default_jfc_cfg = { + .depth = MAX_JFC_DEPTH, + .flag.bs.lock_free = false, + .flag.bs.jfc_inline = false, + .ceqn = 0, +}; + +static struct ubcore_jfr_cfg default_jfr_cfg = { + .depth = MAX_JFR_DEPTH, + .flag.bs.token_policy = UBCORE_TOKEN_NONE, + .flag.bs.lock_free = false, + .flag.bs.tag_matching = false, + .trans_mode = UBCORE_TP_RM, + .max_sge = MAX_SGE, + .min_rnr_timer = MIN_RNR_TIMER, +}; + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][urma]: " fmt + +struct sentry_ubcore_resource { + bool is_created; + + /* dev resource */ + struct ubcore_device *sentry_ubcore_dev; + struct ubcore_tjetty *tjetty[MAX_NODE_NUM]; + struct ubcore_jfs_wr jfs_wr[MAX_NODE_NUM]; + struct ubcore_jfr_wr jfr_wr[MAX_NODE_NUM]; + struct ubcore_sge s_sge[MAX_NODE_NUM]; + struct ubcore_sge r_sge[MAX_NODE_NUM]; + struct ubcore_jetty *jetty; + struct ubcore_jfc *sender_jfc; + struct ubcore_jfc *receiver_jfc; + struct ubcore_jfr *jetty_jfr; + struct ubcore_target_seg *s_seg; + struct ubcore_target_seg *r_seg; + void *s_seg_va; + void *r_seg_va; + + /* eid info */ + union ubcore_eid local_eid; + union ubcore_eid server_eid[MAX_NODE_NUM]; + char server_eid_array[MAX_NODE_NUM][EID_MAX_LEN]; + int server_eid_valid_num; + uint32_t eid_index; + + /* cnt for retry */ + atomic_t send_cnt[MAX_NODE_NUM]; + atomic_t remote_recv_cnt[MAX_NODE_NUM]; + atomic_t urma_hb_ack_list[MAX_NODE_NUM]; /* 0 = down, 1 = up */ +}; + +struct sentry_urma_context { + /* Heartbeat threads and state */ + struct task_struct *hb_thread; + bool heartbeat_enable; + + uint32_t client_jetty_id; + int local_eid_num_configured; + int server_eid_num_configured; + bool is_panic_mode; + + char *kbuf; /* server_buf client_jetty_id */ + char *server_buf_part; + char *client_jetty_id_part; + char *client_info_buf; /* for proc_read */ + bool is_valid_client_info; + + struct ubcore_cr *update_recv_cnt_cr; + struct ubcore_cr *heartbeat_thread_cr; + struct ubcore_cr *urma_recv_cr; + struct ubcore_cr *urma_recv_sender_cr; + + bool is_register_ubcore_client; + + struct proc_dir_entry *proc_dir; +}; + +static DEFINE_MUTEX(sentry_urma_mutex); +static struct sentry_ubcore_resource sentry_urma_dev[MAX_DIE_NUM]; +static struct sentry_urma_context sentry_urma_ctx; + +bool g_is_created_ubcore_resource; +EXPORT_SYMBOL(g_is_created_ubcore_resource); + +/** + * urma_mutex_lock_op - Lock or unlock the URMA mutex based on panic mode + * @is_to_lock: URMA_LOCK to lock, URMA_UNLOCK to unlock + * + * This function handles mutex locking/unlocking only when not in panic mode + * to avoid deadlocks during system panic. + */ +static void urma_mutex_lock_op(int is_to_lock) +{ + if (!sentry_urma_ctx.is_panic_mode) { + if (is_to_lock) + mutex_lock(&sentry_urma_mutex); + else + mutex_unlock(&sentry_urma_mutex); + } +} + +/** + * swap_eid_byteorder - Swap byte order of EID + * @dst: Destination EID buffer + * @src: Source EID buffer + * + * This function swaps the byte order of EID from big-endian to little-endian. + */ +static inline void swap_eid_byteorder(uint8_t dst[UBCORE_EID_SIZE], + const uint8_t src[UBCORE_EID_SIZE]) +{ + int i; + + for (i = 0; i < UBCORE_EID_SIZE; i++) + dst[i] = src[UBCORE_EID_SIZE - 1 - i]; +} + +/** + * compare_ubcore_eid - Compare two URMA EIDs with byte order handling + * @src_eid: Source EID to compare + * @dst_eid: Destination EID to compare against + * + * Return: 0 if EIDs match, -EINVAL if they don't match even after byte order swap + * + * This function compares two EIDs and handles potential byte order differences + * by attempting a byte-swapped comparison if the initial comparison fails. + */ +static int compare_ubcore_eid(const union ubcore_eid src_eid, + const union ubcore_eid dst_eid) +{ + if (memcmp(&src_eid, &dst_eid, sizeof(union ubcore_eid)) == 0) + return 0; + + /* + * The byte order of the saved data may differ; + * compare again after conversion. + */ + union ubcore_eid new_src_eid; + + swap_eid_byteorder(new_src_eid.raw, src_eid.raw); + if (memcmp(&new_src_eid, &dst_eid, sizeof(union ubcore_eid)) == 0) { + pr_info("change byte order to match success, src eid:%llx, %x, %x, new src eid: %llx, %x, %x\n", + src_eid.in4.reserved, src_eid.in4.prefix, src_eid.in4.addr, + new_src_eid.in4.reserved, new_src_eid.in4.prefix, + new_src_eid.in4.addr); + return 0; + } + return -EINVAL; +} + + +/** + * unimport_tjetty - Unimport all target jetties for a specific die + * @die_index: Index of the die to unimport jetties from + * + * Return: 0 on success, -EINVAL on invalid die_index + * + * This function unimports all target jetties associated with a specific die + * index and cleans up the references. + */ +static int unimport_tjetty(int die_index) +{ + int i; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + for (i = 0; i < MAX_NODE_NUM; i++) { + if (sentry_urma_dev[die_index].tjetty[i]) { + ubcore_unimport_jetty(sentry_urma_dev[die_index].tjetty[i]); + sentry_urma_dev[die_index].tjetty[i] = NULL; + } + } + + return 0; +} + + +static void release_urma_dev_source(int die_index) +{ + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_info("urma %d dev is not exist, ignore to release the urma source.\n", die_index); + return; + } + + unimport_tjetty(die_index); + + if (sentry_urma_dev[die_index].jetty) { + ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); + sentry_urma_dev[die_index].jetty = NULL; + } + + if (sentry_urma_dev[die_index].s_seg) { + ubcore_unregister_seg(sentry_urma_dev[die_index].s_seg); + sentry_urma_dev[die_index].s_seg = NULL; + kfree(sentry_urma_dev[die_index].s_seg_va); + sentry_urma_dev[die_index].s_seg_va = NULL; + } + + if (sentry_urma_dev[die_index].r_seg) { + ubcore_unregister_seg(sentry_urma_dev[die_index].r_seg); + sentry_urma_dev[die_index].r_seg = NULL; + kfree(sentry_urma_dev[die_index].r_seg_va); + sentry_urma_dev[die_index].r_seg_va = NULL; + } + + if (sentry_urma_dev[die_index].jetty_jfr) { + ubcore_delete_jfr(sentry_urma_dev[die_index].jetty_jfr); + sentry_urma_dev[die_index].jetty_jfr = NULL; + } + + if (sentry_urma_dev[die_index].receiver_jfc) { + ubcore_delete_jfc(sentry_urma_dev[die_index].receiver_jfc); + sentry_urma_dev[die_index].receiver_jfc = NULL; + } + + if (sentry_urma_dev[die_index].sender_jfc) { + ubcore_delete_jfc(sentry_urma_dev[die_index].sender_jfc); + sentry_urma_dev[die_index].sender_jfc = NULL; + } + + sentry_urma_dev[die_index].sentry_ubcore_dev = NULL; + sentry_urma_dev[die_index].is_created = false; + + sentry_urma_dev[die_index].server_eid_valid_num = 0; + memset(&sentry_urma_dev[die_index].local_eid, 0, sizeof(sentry_urma_dev[die_index].local_eid)); + memset(sentry_urma_dev[die_index].server_eid, 0, sizeof(sentry_urma_dev[die_index].server_eid)); + memset(sentry_urma_dev[die_index].server_eid_array, 0, MAX_NODE_NUM * EID_MAX_LEN * sizeof(char)); +} + +/** + * sentry_add_device - Add URMA device to the device list + * @dev: URMA device to add + * + * Return: 0 on success, -ENOMEM on memory allocation failure + * + * This function allocates and initializes a device node and adds it to + * the global URMA device list. + */ +static int sentry_add_device(struct ubcore_device *dev) +{ + struct ubcore_dev_list *dev_node; + + dev_node = kmalloc(sizeof(*dev_node), GFP_KERNEL); + if (!dev_node) { + pr_err("failed to allocate dev node\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&dev_node->list); + dev_node->dev = dev; + list_add_tail(&dev_node->list, &ub_dev_list_head); + + return 0; +} + +/** + * sentry_remove_device - Remove URMA device from the device list + * @dev: URMA device to remove + * @d: Unused parameter + * + * This function searches for the specified device in the global list + * and removes it, freeing the associated memory. + */ +static void sentry_remove_device(struct ubcore_device *dev, void *d __always_unused) +{ + struct ubcore_dev_list *dev_node; + int die_index = 0; + + urma_mutex_lock_op(URMA_LOCK); + list_for_each_entry(dev_node, &ub_dev_list_head, list) { + if (dev_node->dev == dev) { + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + if (sentry_urma_dev[die_index].sentry_ubcore_dev == dev) { + pr_info("release the urma %d dev before remove the urma device\n", die_index); + release_urma_dev_source(die_index); + break; + } + } + list_del(&dev_node->list); + kfree(dev_node); + break; + } + } + urma_mutex_lock_op(URMA_UNLOCK); +} + +static struct ubcore_client sentry_ubcore_client = { + .list_node = LIST_HEAD_INIT(sentry_ubcore_client.list_node), + .client_name = "sentry_ubcore_client", + .add = sentry_add_device, + .remove = sentry_remove_device, +}; + +/** + * free_global_char - Free all dynamically allocated global character buffers + * + * This function safely frees all global character buffers used in the module + * and sets the pointers to NULL to prevent use-after-free. + */ +void free_global_char(void) +{ + kfree(sentry_urma_ctx.kbuf); + sentry_urma_ctx.kbuf = NULL; + + kfree(sentry_urma_ctx.server_buf_part); + sentry_urma_ctx.server_buf_part = NULL; + + kfree(sentry_urma_ctx.client_jetty_id_part); + sentry_urma_ctx.client_jetty_id_part = NULL; + + kfree(sentry_urma_ctx.client_info_buf); + sentry_urma_ctx.client_info_buf = NULL; + + kfree(sentry_urma_ctx.update_recv_cnt_cr); + sentry_urma_ctx.update_recv_cnt_cr = NULL; + + kfree(sentry_urma_ctx.heartbeat_thread_cr); + sentry_urma_ctx.heartbeat_thread_cr = NULL; + + kfree(sentry_urma_ctx.urma_recv_cr); + sentry_urma_ctx.urma_recv_cr = NULL; + + kfree(sentry_urma_ctx.urma_recv_sender_cr); + sentry_urma_ctx.urma_recv_sender_cr = NULL; +} + + +/** + * init_global_char - Initialize global character buffers + * + * Return: 0 on success, -ENOMEM on allocation failure + * + * This function allocates and initializes all global character buffers + * used for client information storage and communication. + */ +int init_global_char(void) +{ + sentry_urma_ctx.kbuf = kzalloc(CLIENT_INFO_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.kbuf) { + pr_err("kzalloc kbuf failed\n"); + goto err_free; + } + + sentry_urma_ctx.server_buf_part = kzalloc(SERVER_EID_PART_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.server_buf_part) { + pr_err("kzalloc server_buf_part failed\n"); + goto err_free; + } + + sentry_urma_ctx.client_jetty_id_part = kzalloc(JETTY_ID_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.client_jetty_id_part) { + pr_err("kzalloc client_jetty_id_part failed\n"); + goto err_free; + } + + sentry_urma_ctx.client_info_buf = kzalloc(CLIENT_INFO_BUF_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.client_info_buf) { + pr_err("kzalloc client_info_buf failed\n"); + goto err_free; + } + + sentry_urma_ctx.update_recv_cnt_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.update_recv_cnt_cr) { + pr_err("kzalloc update_recv_cnt_cr failed\n"); + goto err_free; + } + sentry_urma_ctx.heartbeat_thread_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.heartbeat_thread_cr) { + pr_err("kzalloc heartbeat_thread_cr failed\n"); + goto err_free; + } + sentry_urma_ctx.urma_recv_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.urma_recv_cr) { + pr_err("kzalloc urma_recv_cr failed\n"); + goto err_free; + } + sentry_urma_ctx.urma_recv_sender_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.urma_recv_sender_cr) { + pr_err("kzalloc urma_recv_sender_cr failed\n"); + goto err_free; + } + + return 0; + +err_free: + free_global_char(); + return -ENOMEM; +} + +/** + * init_ubcore - Initialize URMA core functionality + * + * Return: 0 on success, appropriate error code on failure + * + * This function registers the URMA client and verifies that at least one + * URMA device is available. It handles the initialization of URMA core + * components. + */ +int init_ubcore(void) +{ + int ret; + + if (!list_empty(&ub_dev_list_head)) { + pr_err("hw_clear is already setup\n"); + return -EEXIST; + } + + ret = ubcore_register_client(&sentry_ubcore_client); + if (ret) { + pr_err("fail to register ubcore client\n"); + return -EFAULT; + } + + sentry_urma_ctx.is_register_ubcore_client = true; + pr_info("ubcore_register_client success\n"); + + if (list_empty(&ub_dev_list_head)) { + pr_err("fail to get ubcore device\n"); + ret = -ENODEV; + goto init_ubcore_fail; + } + + return 0; + +init_ubcore_fail: + ubcore_unregister_client(&sentry_ubcore_client); + sentry_urma_ctx.is_register_ubcore_client = false; + return ret; +} + + +/** + * release_ubcore_resource - Release all URMA resources for all dies + * + * This function stops the heartbeat thread and releases all URMA resources + * including jetties, segments, JFRs, and JFCs for all die indices. + * It handles resource cleanup in the proper order to avoid dependency issues. + */ +static void release_ubcore_resource(void) +{ + int die_index; + + urma_mutex_lock_op(URMA_LOCK); + + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + + g_is_created_ubcore_resource = false; + + /* Release resources for each die */ + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + release_urma_dev_source(die_index); + } + + urma_mutex_lock_op(URMA_UNLOCK); +} + +/** + * release_all_resource - Release all URMA resources and unregister client + * + * This function cleans up all allocated URMA resources including device + * resources and unregisters the URMA client if it was registered. + */ +static void release_all_resource(void) +{ + release_ubcore_resource(); + + if (sentry_urma_ctx.is_register_ubcore_client) { + ubcore_unregister_client(&sentry_ubcore_client); + sentry_urma_ctx.is_register_ubcore_client = false; + } +} + +/** + * str_to_eid - Convert string representation to URMA EID + * @eid_str: String representation of EID + * @eid: Pointer to store converted EID + * + * Return: 0 on success, -EINVAL on invalid input + * + * This function converts a string representation of an EID to the binary + * format used by URMA, supporting IPv6 notation. + */ +int str_to_eid(const char *eid_str, union ubcore_eid *eid) +{ + if (strlen(eid_str) != EID_MAX_LEN - 1) { + pr_err("eid str %s len is invalid, failed to transfer\n", eid_str); + return -EINVAL; + } + + if (in6_pton(eid_str, EID_MAX_LEN, (u8 *)eid, '\0', NULL) > 0) { + pr_info("parse eid success, config eid: %llx, %x, %x\n", + eid->in4.reserved, eid->in4.prefix, eid->in4.addr); + return 0; + } + + pr_err("parse eid string [%s] failed\n", eid_str); + return -EINVAL; +} +EXPORT_SYMBOL(str_to_eid); + +/** + * set_urma_panic_mode - Set URMA panic mode status + * @is_panic: true to in panic mode, false to otherwise + * + * This function sets the panic mode flag which affects mutex locking + * behavior during system panic conditions. + */ +void set_urma_panic_mode(bool is_panic) +{ + sentry_urma_ctx.is_panic_mode = is_panic; +} +EXPORT_SYMBOL(set_urma_panic_mode); + +/** + * sentry_register_seg - Register a segment for URMA operations + * @dev: URMA device to register segment with + * @num_sge: Number of scatter-gather elements + * @is_send: true for send segment, false for receive segment + * @die_index: Index of the die for resource tracking + * + * Return: Pointer to registered segment on success, ERR_PTR on failure + * + * This function registers a memory segment with the URMA device for + * send or receive operations. + */ +static struct ubcore_target_seg *sentry_register_seg(struct ubcore_device *dev, + uint32_t num_sge, bool is_send, + int die_index) +{ + union ubcore_reg_seg_flag flag = {0}; + uint64_t seg_len = SGE_MAX_LEN * num_sge; + struct ubcore_seg_cfg cfg = {0}; + struct ubcore_target_seg *ret; + void *seg_va; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return ERR_PTR(-EINVAL); + } + + seg_va = kzalloc(seg_len, GFP_KERNEL); + if (!seg_va) + return ERR_PTR(-ENOMEM); + + flag.bs.token_policy = UBCORE_TOKEN_NONE; + flag.bs.cacheable = UBCORE_NON_CACHEABLE; + flag.bs.access = UBCORE_ACCESS_LOCAL_ONLY; + cfg.va = (uint64_t)seg_va; + cfg.len = seg_len; + cfg.flag = flag; + + ret = ubcore_register_seg(dev, &cfg, NULL); + if (IS_ERR_OR_NULL(ret)) { + pr_err("reg seg failed\n"); + goto free_seg; + } + + if (is_send) + sentry_urma_dev[die_index].s_seg_va = seg_va; + else + sentry_urma_dev[die_index].r_seg_va = seg_va; + + return ret; + +free_seg: + kfree(seg_va); + return ret; +} + +/** + * sentry_create_jetty - Create a URMA jetty endpoint + * @device: URMA device to create jetty on + * @jfc_s: Send completion queue + * @jfc_r: Receive completion queue + * @jfr: Receive work queue + * @jetty_id: Jetty identifier + * + * Return: Pointer to created jetty on success, NULL on failure + * + * This function creates a jetty endpoint with the specified configuration + * for URMA communication. + */ +static struct ubcore_jetty *sentry_create_jetty(struct ubcore_device *device, + struct ubcore_jfc *jfc_s, + struct ubcore_jfc *jfc_r, + struct ubcore_jfr *jfr, + uint32_t jetty_id) +{ + struct ubcore_jetty_cfg jetty_cfg = { + .id = jetty_id, + .flag.bs.share_jfr = 1, + .trans_mode = UBCORE_TP_RM, + .eid_index = 0, + .jfs_depth = MAX_JFS_DEPTH, + .priority = 0, /* Highest priority */ + .max_send_sge = 1, + .max_send_rsge = 1, + .jfr_depth = MAX_JFR_DEPTH, + .max_recv_sge = 1, + .send_jfc = jfc_s, + .recv_jfc = jfc_r, + .jfr = jfr, + }; + + return ubcore_create_jetty(device, &jetty_cfg, NULL, NULL); +} + +/** + * sentry_post_recv - Post a receive work request to a jetty + * @r_jetty: Receive jetty to post to + * @recv_seg: Receive segment to use + * @node_idx: Node index for scatter-gather element + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function posts a receive work request to the specified jetty + * for asynchronous data reception. + */ +int sentry_post_recv(struct ubcore_jetty *r_jetty, struct ubcore_target_seg *recv_seg, + int node_idx, int die_index) +{ + uint64_t sge_addr; + struct ubcore_jfr_wr *jfr_bad_wr = NULL; + int ret; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + return -EINVAL; + } + + sge_addr = (uint64_t)sentry_urma_dev[die_index].r_seg_va + SGE_MAX_LEN * node_idx; + sentry_urma_dev[die_index].r_sge[node_idx].addr = sge_addr; + sentry_urma_dev[die_index].r_sge[node_idx].len = SGE_MAX_LEN; + sentry_urma_dev[die_index].r_sge[node_idx].tseg = recv_seg; + sentry_urma_dev[die_index].jfr_wr[node_idx].src.sge = + &sentry_urma_dev[die_index].r_sge[node_idx]; + sentry_urma_dev[die_index].jfr_wr[node_idx].src.num_sge = 1; + sentry_urma_dev[die_index].jfr_wr[node_idx].user_ctx = sge_addr; + + ret = ubcore_post_jetty_recv_wr(r_jetty, + &sentry_urma_dev[die_index].jfr_wr[node_idx], + &jfr_bad_wr); + if (ret != 0 && ret != -ENOMEM) { + pr_err("sentry_post_recv: ubcore_post_jetty_recv_wr failed, ret %d\n", ret); + return ret; + } + + return 0; +} + +/** + * create_ubcore_resource - Create URMA core resources for a specific die + * @die_index: Index of the die to create resources for + * + * Return: 0 on success, negative error code on failure + * + * This function creates all necessary URMA resources including JFCs, JFRs, + * segments, and jetties for the specified die index. + */ +static int create_ubcore_resource(int die_index) +{ + int ret; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + urma_mutex_lock_op(URMA_LOCK); + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + urma_mutex_lock_op(URMA_UNLOCK); + pr_err("Please set eid first\n"); + return -EINVAL; + } + + /* Create sender JFC */ + sentry_urma_dev[die_index].sender_jfc = + ubcore_create_jfc(sentry_urma_dev[die_index].sentry_ubcore_dev, + &default_jfc_cfg, NULL, NULL, NULL); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].sender_jfc)) { + pr_err("ubcore_create_jfc err\n"); + sentry_urma_dev[die_index].sender_jfc = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + ret = ubcore_rearm_jfc(sentry_urma_dev[die_index].sender_jfc, false); + if (ret != 0) { + pr_err("rearm jfc_r failed, ret %d\n", ret); + goto err_create_urma_resource; + } + pr_info("ubcore_create_jfc success\n"); + + /* Create receiver JFC */ + sentry_urma_dev[die_index].receiver_jfc = + ubcore_create_jfc(sentry_urma_dev[die_index].sentry_ubcore_dev, + &default_jfc_cfg, NULL, NULL, NULL); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].receiver_jfc)) { + pr_err("ubcore_create_jfc err\n"); + sentry_urma_dev[die_index].receiver_jfc = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + ret = ubcore_rearm_jfc(sentry_urma_dev[die_index].receiver_jfc, false); + if (ret != 0) { + pr_err("rearm jfc_r failed, ret %d\n", ret); + goto err_create_urma_resource; + } + pr_info("ubcore_create_jfc success\n"); + + /* Create JFR */ + default_jfr_cfg.eid_index = sentry_urma_dev[die_index].eid_index; + default_jfr_cfg.jfc = sentry_urma_dev[die_index].receiver_jfc; + sentry_urma_dev[die_index].jetty_jfr = + ubcore_create_jfr(sentry_urma_dev[die_index].sentry_ubcore_dev, + &default_jfr_cfg, NULL, NULL); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].jetty_jfr)) { + pr_err("ubcore_create_jfr err\n"); + sentry_urma_dev[die_index].jetty_jfr = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + pr_info("ubcore_create_jfr success\n"); + + /* Register send segment */ + sentry_urma_dev[die_index].s_seg = + sentry_register_seg(sentry_urma_dev[die_index].sentry_ubcore_dev, + MAX_NODE_NUM, true, die_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].s_seg)) { + pr_err("ubcore_register_s_seg err\n"); + sentry_urma_dev[die_index].s_seg = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + /* Register receive segment */ + sentry_urma_dev[die_index].r_seg = + sentry_register_seg(sentry_urma_dev[die_index].sentry_ubcore_dev, + MAX_NODE_NUM, false, die_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].r_seg)) { + pr_err("ubcore_register_r_seg err\n"); + sentry_urma_dev[die_index].r_seg = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + sentry_urma_dev[die_index].is_created = true; + pr_info("ubcore_register_seg success\n"); + urma_mutex_lock_op(URMA_UNLOCK); + + return 0; + +err_create_urma_resource: + urma_mutex_lock_op(URMA_UNLOCK); + release_ubcore_resource(); + return ret; +} + +/** + * create_tjetty - Create a target jetty for remote communication + * @tjetty_cfg: Target jetty configuration + * @eid_index: EID index for the target + * @die_index: Die index for resource access + * + * Return: Pointer to created target jetty on success, NULL on failure + * + * This function creates a target jetty for communication with a remote + * endpoint specified by the EID index. + */ +static struct ubcore_tjetty *create_tjetty(struct ubcore_tjetty_cfg *tjetty_cfg, + int eid_index, int die_index) +{ + int ret; + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + return NULL; + } + + struct ubcore_get_tp_cfg tp_cfg = { + .flag.bs.ctp = 1, + .trans_mode = UBCORE_TP_RM, + .local_eid = sentry_urma_dev[die_index].local_eid, + .peer_eid = sentry_urma_dev[die_index].server_eid[eid_index], + }; + uint32_t tp_cnt = 1; + struct ubcore_tp_info tp_list = {}; + struct ubcore_active_tp_cfg active_tp_cfg = {}; + + ret = ubcore_get_tp_list(sentry_urma_dev[die_index].sentry_ubcore_dev, + &tp_cfg, &tp_cnt, &tp_list, NULL); + if (ret != 0) { + pr_err("ubcore_get_tp_list failed, ret %d, server eid %s\n", + ret, sentry_urma_dev[die_index].server_eid_array[eid_index]); + return NULL; + } + + active_tp_cfg.tp_handle = tp_list.tp_handle; + return ubcore_import_jetty_ex(sentry_urma_dev[die_index].sentry_ubcore_dev, + tjetty_cfg, &active_tp_cfg, NULL); +} + +/** + * import - Import and configure URMA jetties for all dies + * + * Return: 0 on success, -EFAULT on failure + * + * This function imports and configures URMA jetties for all configured dies, + * creates local jetties, posts receive work requests, and starts the heartbeat + * thread if enabled. It handles the complete initialization of URMA communication + * endpoints. + */ +int import(void) +{ + struct ubcore_tjetty_cfg tjetty_cfg = {0}; + int ret = 0; + int die_index; + int tjetty_valid_num; + + if (sentry_urma_ctx.client_jetty_id == DEFAULT_INVALID_JETTY_ID) { + pr_err("client_jetty_id not set, import failed\n"); + return -EFAULT; + } + + urma_mutex_lock_op(URMA_LOCK); + + g_is_created_ubcore_resource = false; + + /* Stop existing heartbeat thread */ + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + + /* Configure target jetty */ + tjetty_cfg.id.id = sentry_urma_ctx.client_jetty_id; + tjetty_cfg.flag.bs.token_policy = UBCORE_TOKEN_NONE; + tjetty_cfg.trans_mode = UBCORE_TP_RM; + tjetty_cfg.type = UBCORE_JETTY; + + /* Process each die */ + for (die_index = 0; die_index < sentry_urma_ctx.server_eid_num_configured; die_index++) { + int i; + + tjetty_valid_num = 0; + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("Please set eid first\n"); + goto print_import_result; + } + + /* Clean existing jetties */ + unimport_tjetty(die_index); + if (sentry_urma_dev[die_index].jetty) { + ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); + sentry_urma_dev[die_index].jetty = NULL; + } + + /* Create local jetty */ + sentry_urma_dev[die_index].jetty = + sentry_create_jetty(sentry_urma_dev[die_index].sentry_ubcore_dev, + sentry_urma_dev[die_index].sender_jfc, + sentry_urma_dev[die_index].receiver_jfc, + sentry_urma_dev[die_index].jetty_jfr, + sentry_urma_ctx.client_jetty_id); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].jetty)) { + sentry_urma_dev[die_index].jetty = NULL; + pr_err("ubcore_create_jetty failed for device %s\n", + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name); + goto print_import_result; + } + pr_info("ubcore_create_jetty success for device %s\n", + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name); + + /* Post receive work requests */ + for (i = 0; i < MAX_NODE_NUM; i++) { + ret = sentry_post_recv(sentry_urma_dev[die_index].jetty, + sentry_urma_dev[die_index].r_seg, i, die_index); + if (ret != 0) { + pr_err("No. %u post recv failed, device %s ret %d\n", i, + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name, ret); + ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); + sentry_urma_dev[die_index].jetty = NULL; + goto print_import_result; + } + } + + g_is_created_ubcore_resource = true; + + /* Import target jetties for remote servers (skip local EID at index 0) */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + tjetty_cfg.id.eid = sentry_urma_dev[die_index].server_eid[i]; + sentry_urma_dev[die_index].tjetty[i] = + create_tjetty(&tjetty_cfg, i, die_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].tjetty[i])) { + pr_warn("ubcore_import_jetty_ex err, server eid %s\n", + sentry_urma_dev[die_index].server_eid_array[i]); + sentry_urma_dev[die_index].tjetty[i] = NULL; + continue; + } + tjetty_valid_num++; + } + +print_import_result: + pr_info("import: %d/%d success for device %s\n", + tjetty_valid_num, + sentry_urma_dev[die_index].server_eid_valid_num - 1, /* Exclude local EID */ + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name); + } + + /* Start heartbeat thread if enabled */ + if (sentry_urma_ctx.heartbeat_enable) { + sentry_urma_ctx.hb_thread = kthread_run(heartbeat_thread, NULL, "urma_hb_all"); + if (IS_ERR(sentry_urma_ctx.hb_thread)) { + pr_err("failed to start heartbeat thread\n"); + sentry_urma_ctx.hb_thread = NULL; + } else { + pr_info("urma_hb_all thread start success\n"); + } + } + + urma_mutex_lock_op(URMA_UNLOCK); + return g_is_created_ubcore_resource ? 0 : -EFAULT; +} + +/** + * match_dev_by_local_eid - Find URMA device matching the specified local EID + * @eid: Local EID to match + * @eid_index: Output parameter for EID index + * + * Return: Pointer to matching URMA device, NULL if not found + * + * This function searches through all registered URMA devices to find one + * that has an EID matching the specified local EID. + */ +static struct ubcore_device *match_dev_by_local_eid(const union ubcore_eid *eid, + uint32_t *eid_index) +{ + int cnt = 0; + struct ubcore_dev_list *dev_node; + + list_for_each_entry(dev_node, &ub_dev_list_head, list) { + struct ubcore_eid_info *eid_info = ubcore_get_eid_list(dev_node->dev, &cnt); + int i; + + if (IS_ERR_OR_NULL(eid_info)) { + pr_warn("ubcore_get_eid_list failed\n"); + continue; + } + + /* One device may have multiple EIDs */ + for (i = 0; i < cnt; i++) { + pr_info("eid_info->eid: %llx, %x, %x, try to match\n", + eid_info->eid.in4.reserved, eid_info->eid.in4.prefix, + eid_info->eid.in4.addr); + + if (compare_ubcore_eid(eid_info->eid, *eid) == 0) { + pr_info("Match device %s, use it to send/recv data\n", + dev_node->dev->dev_name); + *eid_index = eid_info->eid_index; + return dev_node->dev; + } + eid_info++; + } + } + + pr_err("Cannot find dev by eid: %llx, %x, %x\n", + eid->in4.reserved, eid->in4.prefix, eid->in4.addr); + return NULL; +} + +/** + * match_index_by_remote_ub_eid - Find node and die indices by remote EID + * @remote_eid: Remote EID to search for + * @node_index: Output parameter for node index + * @die_index: Input/Output parameter for die index + * + * Return: 0 on success, -EINVAL if not found + * + * This function searches for a remote EID across all configured dies and nodes. + * If die_index is -1 on input, it will be set to the found die index. + * If die_index is specified, it verifies consistency. + */ +int match_index_by_remote_ub_eid(union ubcore_eid remote_eid, int *node_index, int *die_index) +{ + int i, j; + + for (i = 0; i < sentry_urma_ctx.local_eid_num_configured; i++) { + if (!sentry_urma_dev[i].is_created) { + pr_err("invalid value for sentry_urma_dev[%d].is_created\n", i); + return -EINVAL; + } + + for (j = 0; j < sentry_urma_dev[i].server_eid_valid_num; j++) { + if (memcmp(&sentry_urma_dev[i].server_eid[j], &remote_eid, + sizeof(union ubcore_eid)) == 0) { + *node_index = j; + if (*die_index == -1) { + *die_index = i; + } else if (*die_index != i) { + pr_err("%s error, get die_index %d, input die_index %d\n", + __func__, i, *die_index); + return -1; + } + return 0; + } + } + } + + return -EINVAL; +} +EXPORT_SYMBOL(match_index_by_remote_ub_eid); + +/** + * sentry_create_urma_resource - Create URMA resources for specified EIDs + * @eid: Array of local EIDs to create resources for + * @eid_num: Number of EIDs in the array + * + * Return: 0 on success, negative error code on failure + * + * This function initializes URMA core, creates resources for each specified EID, + * and matches devices to the provided EIDs. It handles both initial setup and + * reconfiguration scenarios. + */ +int sentry_create_urma_resource(union ubcore_eid eid[], int eid_num) +{ + int ret; + int i; + + /* Prepare for new device matching by cleaning up old resources */ + release_all_resource(); + + ret = init_ubcore(); + if (ret) { + pr_err("ubcore init failed\n"); + return -EINVAL; + } + pr_info("ubcore init success\n"); + + /* Create resources for each EID */ + for (i = 0; i < eid_num; i++) { + sentry_urma_dev[i].sentry_ubcore_dev = + match_dev_by_local_eid(&eid[i], &sentry_urma_dev[i].eid_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[i].sentry_ubcore_dev)) + return -EINVAL; + + /* Re-create new URMA resource (e.g., jfs/jfc/jfr/seg) */ + ret = create_ubcore_resource(i); + if (ret) { + pr_err("create_ubcore_resource failed for %llx, %x, %x\n", + eid[i].in4.reserved, eid[i].in4.prefix, eid[i].in4.addr); + release_ubcore_resource(); + return ret; + } + + /* Update URMA EID after successful resource creation */ + memcpy(&sentry_urma_dev[i].local_eid, &eid[i], sizeof(union ubcore_eid)); + } + + sentry_urma_ctx.local_eid_num_configured = eid_num; + return 0; +} +EXPORT_SYMBOL(sentry_create_urma_resource); + +/** + * format_client_info_show_str - Format client information for display + * + * This function formats the client information string for procfs display, + * including server EIDs and client jetty ID in a human-readable format. + */ +static void format_client_info_show_str(void) +{ + bool is_not_single_die = false; + char *p; + int i, j; + + /* Clean up old data */ + if (sentry_urma_ctx.client_info_buf && sentry_urma_ctx.is_valid_client_info) + memset(sentry_urma_ctx.client_info_buf, 0, CLIENT_INFO_BUF_MAX_LEN); + + if (sentry_urma_ctx.is_valid_client_info) { + p = sentry_urma_ctx.client_info_buf; + + for (i = 0; i < sentry_urma_ctx.local_eid_num_configured; i++) { + if (!sentry_urma_dev[i].is_created) { + pr_err("invalid value for sentry_urma_dev[%d].is_created\n", i); + break; + } + + if (is_not_single_die) + p += snprintf(p, CLIENT_INFO_BUF_MAX_LEN, "%s", ";"); + else + p += snprintf(p, CLIENT_INFO_BUF_MAX_LEN, "%s", "server_eid:"); + + for (j = 0; j < sentry_urma_dev[i].server_eid_valid_num; j++) { + p += snprintf(p, CLIENT_INFO_BUF_MAX_LEN - (p - sentry_urma_ctx.client_info_buf), + "%s%s", sentry_urma_dev[i].server_eid_array[j], + j != sentry_urma_dev[i].server_eid_valid_num - 1 ? "," : ""); + } + is_not_single_die = true; + } + + snprintf(p, CLIENT_INFO_BUF_MAX_LEN, ", client_jetty_id:%d\n", + sentry_urma_ctx.client_jetty_id); + } else { + snprintf(sentry_urma_ctx.client_info_buf, CLIENT_INFO_BUF_MAX_LEN, + "server_eid:%s, client_jetty_id:%d\n", "null", DEFAULT_INVALID_JETTY_ID); + } +} + +/** + * process_multi_eid_string - Process multiple EID strings from a buffer + * @eid_buf: Buffer containing EID strings + * @eid_array: Output array for EID strings + * @eid_tmp: Output array for parsed EIDs + * @sepstr: Separator string for tokenizing + * @eid_max_num: Maximum number of EIDs to process + * + * Return: Number of EIDs processed on success, negative error code on failure + * + * This function parses a buffer containing multiple EID strings separated by + * the specified separator and converts them to binary EID format. + */ +int process_multi_eid_string(char *eid_buf, char eid_array[][EID_MAX_LEN], + union ubcore_eid eid_tmp[], const char *sepstr, int eid_max_num) +{ + int ret; + int eid_num = 0; + char *eid_part; + + while ((eid_part = strsep(&eid_buf, sepstr)) != NULL) { + if (eid_num >= eid_max_num) { + pr_err("Invalid eid format: max num %d, current input exceeds\n", + eid_max_num); + return -EINVAL; + } + + if (strlen(eid_part) > EID_MAX_LEN) { + pr_err("Invalid eid format: str too long: %s\n", eid_part); + return -EINVAL; + } + + ret = str_to_eid(eid_part, &eid_tmp[eid_num]); + if (ret) { + pr_err("Invalid eid format: eid str %s\n", eid_part); + return -EINVAL; + } + + memcpy(eid_array[eid_num], eid_part, EID_MAX_LEN); + eid_num++; + } + + return eid_num; +} +EXPORT_SYMBOL(process_multi_eid_string); + +/** + * process_server_eid_str - Process server EID string for multiple dies + * @server_buf: Buffer containing server EID strings + * @server_ub_eid_tmp: Output array for parsed server EIDs + * @server_eid_valid_num: Output array for valid EID counts per die + * + * Return: 0 on success, negative error code on failure + * + * This function processes server EID strings for multiple dies, validating + * that local EIDs match the configured values. + */ +static int process_server_eid_str(char *server_buf, + union ubcore_eid server_ub_eid_tmp[MAX_DIE_NUM][MAX_NODE_NUM], + int *server_eid_valid_num) +{ + int ret; + int die_index = 0; + char *single_server_eid_part; + + while ((single_server_eid_part = strsep(&server_buf, ";")) != NULL) { + if (die_index >= MAX_DIE_NUM) { + pr_err("Invalid eid format: max num %d, current input exceeds\n", + MAX_DIE_NUM); + return -EINVAL; + } + + if (strlen(single_server_eid_part) > SINGLE_SERVER_PART_LEN) { + pr_err("Invalid server eid format: str too long: %s\n", + single_server_eid_part); + return -EINVAL; + } + + ret = process_multi_eid_string(single_server_eid_part, + sentry_urma_dev[die_index].server_eid_array, + server_ub_eid_tmp[die_index], ",", MAX_NODE_NUM); + if (ret < 0) + return ret; + + server_eid_valid_num[die_index] = ret; + + /* Verify local EID in server EID matches configured EID */ + if (memcmp(&server_ub_eid_tmp[die_index][0], + &sentry_urma_dev[die_index].local_eid, + sizeof(union ubcore_eid)) != 0) { + pr_err("Error: local eid in server eid %llx%llx does not match configured eid %llx%llx\n", + server_ub_eid_tmp[die_index][0].in6.subnet_prefix, + server_ub_eid_tmp[die_index][0].in6.interface_id, + sentry_urma_dev[die_index].local_eid.in6.subnet_prefix, + sentry_urma_dev[die_index].local_eid.in6.interface_id); + return -EINVAL; + } + die_index++; + } + + return 0; +} + +/** + * proc_client_info_write - Write handler for client info proc file + * @file: proc file pointer + * @user_buf: user space buffer + * @count: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + * + * This function processes client information input from userspace, including + * server EIDs and client jetty ID, and configures the URMA resources accordingly. + */ +static ssize_t proc_client_info_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int n = 0; + int ret; + union ubcore_eid server_ub_eid_tmp[MAX_DIE_NUM][MAX_NODE_NUM]; + int server_eid_valid_num[MAX_DIE_NUM] = {0}; + uint32_t client_jetty_id; + int i; + + if (count > CLIENT_INFO_MAX_LEN - 1) { + pr_err("invalid server eid info, max len %d, actual %lu\n", + CLIENT_INFO_MAX_LEN - 1, count); + return -EINVAL; + } + + if (copy_from_user(sentry_urma_ctx.kbuf, user_buf, count)) { + pr_err("failed parse client info input: copy_from_user failed\n"); + return -EFAULT; + } + sentry_urma_ctx.kbuf[count] = '\0'; + pr_info("proc_client_info_write kbuf is %s\n", sentry_urma_ctx.kbuf); + + /* + * Parse server EID part and client jetty ID part + * ((39 + 1) * 32 - 1) * 2 + 1 = 2559 + */ + ret = sscanf(sentry_urma_ctx.kbuf, "%2559[^ ] %6[^\n]%n", + sentry_urma_ctx.server_buf_part, + sentry_urma_ctx.client_jetty_id_part, + &n); + if (ret != 2) { + pr_err("Invalid msg str format and parse client info failed! str [%s]\n", + sentry_urma_ctx.kbuf); + return -EINVAL; + } + + /* Process server EIDs */ + ret = process_server_eid_str(sentry_urma_ctx.server_buf_part, + server_ub_eid_tmp, server_eid_valid_num); + if (ret) + return ret; + + /* Determine number of configured server EIDs */ + for (i = 0; i < MAX_DIE_NUM; i++) { + if (server_eid_valid_num[i] == 0) + break; + sentry_urma_ctx.server_eid_num_configured = i + 1; + } + + if (sentry_urma_ctx.server_eid_num_configured > + sentry_urma_ctx.local_eid_num_configured) { + pr_err("server eid num %d > local eid num %d\n", + sentry_urma_ctx.server_eid_num_configured, + sentry_urma_ctx.local_eid_num_configured); + return -EINVAL; + } + + /* Process client jetty ID */ + ret = kstrtou32(sentry_urma_ctx.client_jetty_id_part, 10, &client_jetty_id); + if (ret < 0) { + pr_err("Invalid format for client_jetty_id, str %s\n", + sentry_urma_ctx.client_jetty_id_part); + return -EINVAL; + } + + if (client_jetty_id < MIN_JETTY_ID || client_jetty_id > MAX_JETTY_ID) { + pr_err("client_jetty_id %u out of range [%d, %d]\n", + client_jetty_id, MIN_JETTY_ID, MAX_JETTY_ID); + return -EINVAL; + } + pr_info("client_jetty_id is %u\n", client_jetty_id); + + /* Update global configuration */ + sentry_urma_ctx.is_valid_client_info = true; + sentry_urma_ctx.client_jetty_id = client_jetty_id; + + for (i = 0; i < MAX_DIE_NUM; i++) { + memcpy(sentry_urma_dev[i].server_eid, server_ub_eid_tmp[i], + sizeof(union ubcore_eid) * MAX_NODE_NUM); + sentry_urma_dev[i].server_eid_valid_num = server_eid_valid_num[i]; + } + + /* Import URMA resources */ + ret = import(); + if (ret != 0) { + pr_err("ubcore import failed\n"); + return -EINVAL; + } + + return count; +} + +/** + * proc_client_info_show - Read handler for client info proc file + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + * + * This function displays the current client configuration including server EIDs + * and client jetty ID in a human-readable format. + */ +static ssize_t proc_client_info_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + format_client_info_show_str(); + return simple_read_from_buffer(buf, count, ppos, + sentry_urma_ctx.client_info_buf, + strlen(sentry_urma_ctx.client_info_buf)); +} + +static const struct proc_ops proc_client_info_file_operations = { + .proc_read = proc_client_info_show, + .proc_write = proc_client_info_write, +}; + +/** + * proc_heartbeat_write - Write handler for heartbeat control proc file + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + * + * This function controls the heartbeat thread based on user input ("on" or "off"). + * It starts or stops the heartbeat monitoring thread accordingly. + */ +static ssize_t proc_heartbeat_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for /proc/%s/%s, only 'off' or 'on' allowed\n", + PROC_DEVICE_PATH, PROC_HEARTBEAT_SWITCH); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set /proc/%s/%s failed\n", PROC_DEVICE_PATH, PROC_HEARTBEAT_SWITCH); + return -EFAULT; + } + + /* Remove trailing newline if present */ + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + if (!g_is_created_ubcore_resource) { + sentry_urma_ctx.heartbeat_enable = false; + pr_warn("Failed to start heartbeat: local eid not set\n"); + return -EINVAL; + } + + sentry_urma_ctx.hb_thread = kthread_run(heartbeat_thread, NULL, "urma_hb_all"); + if (IS_ERR(sentry_urma_ctx.hb_thread)) { + sentry_urma_ctx.heartbeat_enable = false; + pr_err("failed to start heartbeat thread\n"); + sentry_urma_ctx.hb_thread = NULL; + return -EINVAL; + } + sentry_urma_ctx.heartbeat_enable = true; + pr_info("heartbeat thread enabled\n"); + + } else if (strcmp(enable_str, "off") == 0) { + sentry_urma_ctx.heartbeat_enable = false; + pr_info("heartbeat thread disabled\n"); + + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + } + } else { + pr_err("invalid value for /proc/%s/%s\n", + PROC_DEVICE_PATH, PROC_HEARTBEAT_SWITCH); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_heartbeat_show - Read handler for heartbeat control proc file + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + * + * This function displays the current heartbeat thread status ("on" or "off"). + */ +static ssize_t proc_heartbeat_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + const char *status = sentry_urma_ctx.heartbeat_enable ? "on" : "off"; + size_t len = sentry_urma_ctx.heartbeat_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +static const struct proc_ops proc_heartbeat_file_operations = { + .proc_read = proc_heartbeat_show, + .proc_write = proc_heartbeat_write, +}; + +/** + * heartbeat_thread - Heartbeat monitoring thread function + * @arg: thread argument (unused) + * + * Return: 0 on thread exit + * + * This function implements the heartbeat monitoring mechanism for URMA nodes. + * It periodically sends heartbeat messages, checks for acknowledgments, and + * attempts to rebuild connections to unresponsive nodes. + */ +static int heartbeat_thread(void *arg) +{ + int i, cnt; + int die_index; + + while (!kthread_should_stop()) { + if (!sentry_urma_ctx.heartbeat_enable) { + msleep_interruptible(HB_WAIT_ACK_SLEEP_MS); + continue; + } + + uint64_t start_time = ktime_get_ns(); + + /* Reset heartbeat acknowledgment status for all nodes */ + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) + atomic_set(&sentry_urma_dev[die_index].urma_hb_ack_list[i], 0); + } + pr_info("start to detect heartbeat\n"); + + /* Send heartbeat to inactive nodes */ + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + bool need_rebuild[MAX_NODE_NUM] = {false}; + bool rebuilt = false; + + if (!sentry_urma_dev[die_index].is_created) + break; + + /* sentry_urma_dev[die_index].server_eid_array[0] is local_eid */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + pr_info("send heartbeat to node %d (eid=%s)\n", i, + sentry_urma_dev[die_index].server_eid_array[i]); + sentry_post_jetty_send_wr(HEARTBEAT, strlen(HEARTBEAT) + 1, i, die_index); + } + + msleep_interruptible(HB_WAIT_ACK_SLEEP_MS); + + /* Check for heartbeat acknowledgments */ + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + memset(sentry_urma_ctx.heartbeat_thread_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + cnt = sentry_poll_jfc(sentry_urma_dev[die_index].sender_jfc, + MAX_NODE_NUM, sentry_urma_ctx.heartbeat_thread_cr, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + if (cnt > 0) { + for (int k = 0; k < cnt; k++) + pr_info("heartbeat cr[%d].status=%d\n", k, sentry_urma_ctx.heartbeat_thread_cr[k].status); + } + + /* Check final heartbeat result and rebuild if needed */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + if (!atomic_read(&sentry_urma_dev[die_index].urma_hb_ack_list[i])) { + /* Link down, try to rebuild link */ + pr_info("Failed to detect heartbeat of node %d (eid=%s), start rebuild link\n", + i, sentry_urma_dev[die_index].server_eid_array[i]); + if (rebuild_tjetty(i, die_index) == 0) { + pr_info("after rebuild, retry heartbeat for node %d (eid=%s)\n", + i, sentry_urma_dev[die_index].server_eid_array[i]); + sentry_post_jetty_send_wr(HEARTBEAT, strlen(HEARTBEAT) + 1, + i, die_index); + need_rebuild[i] = true; + rebuilt = true; + } + } else { + pr_info("succeed to detect heartbeat of node %d (eid=%s)\n", + i, sentry_urma_dev[die_index].server_eid_array[i]); + } + } + + /* Verify rebuilt connections */ + if (rebuilt) { + msleep_interruptible(HB_WAIT_ACK_SLEEP_MS); + memset(sentry_urma_ctx.heartbeat_thread_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + sentry_poll_jfc(sentry_urma_dev[die_index].sender_jfc, + MAX_NODE_NUM, sentry_urma_ctx.heartbeat_thread_cr, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + pr_info("check rebuilt node heartbeat\n"); + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + if (!need_rebuild[i]) + continue; + + pr_info("node[%s] heartbeat recover %s\n", + sentry_urma_dev[die_index].server_eid_array[i], + !atomic_read(&sentry_urma_dev[die_index].urma_hb_ack_list[i]) ? + "failed" : "success"); + } + } + } + + /* Calculate sleep time to maintain heartbeat interval */ + int msleep_time = HEARTBEAT_INTERVAL_MS - + (int)((ktime_get_ns() - start_time) / NSEC_PER_MSEC); + + if (msleep_time > 0) + msleep_interruptible(msleep_time); + } + + return 0; +} + +/** + * sentry_poll_jfc - Poll completion queue for heartbeat acknowledgments + * @jfc: Jetty completion queue to poll + * @cr_cnt: Maximum number of completions to retrieve + * @cr: Array to store completions + * @die_index: Die index for resource access + * + * Return: Number of completions retrieved, negative on error + * + * This function polls the completion queue for heartbeat acknowledgments + * and updates the remote receive counters for successful completions. + */ +static int sentry_poll_jfc(struct ubcore_jfc *jfc, int cr_cnt, struct ubcore_cr *cr, + int die_index) +{ + int cnt; + int k; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + cnt = ubcore_poll_jfc(jfc, cr_cnt, cr); + if (cnt <= 0) + return cnt; + + /* Process successful completions */ + for (k = 0; k < cnt; k++) { + int idx = -1; + int tmp_die_index = die_index; + + if (cr[k].status == 0) { + match_index_by_remote_ub_eid(cr[k].remote_id.eid, &idx, &tmp_die_index); + if (idx >= 0) + atomic_inc(&sentry_urma_dev[tmp_die_index].remote_recv_cnt[idx]); + } + } + + return cnt; +} + +/** + * update_remote_recv_cnt - Update remote receive counters by polling completion queue + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function polls the sender completion queue to update the remote + * receive counters for the specified die index. + */ +static int update_remote_recv_cnt(int die_index) +{ + int cnt; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + if (!sentry_urma_ctx.is_panic_mode && !mutex_trylock(&sentry_urma_mutex)) + return -EBUSY; + + memset(sentry_urma_ctx.update_recv_cnt_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + cnt = sentry_poll_jfc(sentry_urma_dev[die_index].sender_jfc, MAX_NODE_NUM, sentry_urma_ctx.update_recv_cnt_cr, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + if (cnt < 0) { + pr_err("update_remote_recv_cnt: poll sender_jfc error, ret %d\n", cnt); + return -EFAULT; + } + + return 0; +} + +/** + * rebuild_tjetty - Rebuild a target jetty for a specific node + * @idx: Node index to rebuild + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function rebuilds a target jetty for a specific node when connectivity + * issues are detected. It creates a new tjetty, replaces the old one, and + * resets the send/receive counters. + */ +static int rebuild_tjetty(int idx, int die_index) +{ + struct ubcore_tjetty *tjetty_tmp = NULL; + struct ubcore_tjetty *tjetty_to_clear = NULL; + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + return -EINVAL; + } + + struct ubcore_tjetty_cfg cfg = { + .id.id = sentry_urma_ctx.client_jetty_id, + .id.eid = sentry_urma_dev[die_index].server_eid[idx], + .trans_mode = UBCORE_TP_RM, + .type = UBCORE_JETTY, + }; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) { + pr_debug("rebuild_tjetty: lock busy, skipping node %d, eid %s\n", + idx, sentry_urma_dev[die_index].server_eid_array[idx]); + return -EBUSY; + } + + tjetty_tmp = create_tjetty(&cfg, idx, die_index); + if (IS_ERR_OR_NULL(tjetty_tmp)) { + urma_mutex_lock_op(URMA_UNLOCK); + pr_err("rebuild_tjetty: tjetty[%d] ubcore_import_jetty_ex err, eid %s\n", + idx, sentry_urma_dev[die_index].server_eid_array[idx]); + return -EFAULT; + } + + /* Replace old tjetty if it exists */ + if (sentry_urma_dev[die_index].tjetty[idx]) + tjetty_to_clear = sentry_urma_dev[die_index].tjetty[idx]; + + sentry_urma_dev[die_index].tjetty[idx] = tjetty_tmp; + + /* Reset counters */ + atomic_set(&sentry_urma_dev[die_index].send_cnt[idx], 0); + atomic_set(&sentry_urma_dev[die_index].remote_recv_cnt[idx], 0); + + /* Clean up old tjetty */ + if (tjetty_to_clear) + ubcore_unimport_jetty(tjetty_to_clear); + + /* Repost receive work request */ + sentry_post_recv(sentry_urma_dev[die_index].jetty, + sentry_urma_dev[die_index].r_seg, idx, die_index); + + urma_mutex_lock_op(URMA_UNLOCK); + pr_info("rebuild_tjetty: tjetty[%d] rebuilt OK\n", idx); + return 0; +} + +/** + * check_and_rebuild_single_tjetty - Check and rebuild tjetty if needed + * @idx: Node index to check + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure or if rebuild not needed + * + * This function checks the send and receive counters for a specific node and + * rebuilds the tjetty if the difference exceeds the rebuild threshold. + * It also handles counter overflow by resetting when they reach maximum values. + */ +static int check_and_rebuild_single_tjetty(int idx, int die_index) +{ + int ret = 0; + int scnt, rcnt; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + scnt = atomic_read(&sentry_urma_dev[die_index].send_cnt[idx]); + rcnt = atomic_read(&sentry_urma_dev[die_index].remote_recv_cnt[idx]); + + /* Check if rebuild threshold is exceeded */ + if (scnt - rcnt > URMA_REBUILD_THRESHOLD) { + pr_info("tjetty[%d] %s check failed: send_cnt=%d, remote_recv_cnt=%d, rebuild\n", + idx, sentry_urma_dev[die_index].server_eid_array[idx], scnt, rcnt); + /* Reset counters and rebuild */ + atomic_set(&sentry_urma_dev[die_index].send_cnt[idx], 0); + atomic_set(&sentry_urma_dev[die_index].remote_recv_cnt[idx], 0); + ret = rebuild_tjetty(idx, die_index); + } + + /* Handle counter overflow */ + if (scnt > URMA_CNT_MAX_NUM && rcnt > URMA_CNT_MAX_NUM) { + atomic_set(&sentry_urma_dev[die_index].send_cnt[idx], 0); + atomic_set(&sentry_urma_dev[die_index].remote_recv_cnt[idx], 0); + } + + return ret; +} + +/** + * sentry_post_jetty_send_wr - Post a send work request to a jetty + * @buf: Data buffer to send + * @len: Length of data to send + * @tjetty_idx: Target jetty index + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function posts a send work request to the specified target jetty, + * copying the data to the send segment and updating the send counters. + */ +static int sentry_post_jetty_send_wr(const char *buf, size_t len, int tjetty_idx, + int die_index) +{ + int ret; + struct ubcore_jfs_wr *bad_wr = NULL; + struct ubcore_tjetty *tj_i; + uint64_t s_seg_va_i; + size_t actual_len; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) { + pr_debug("sentry_post_jetty_send_wr: lock busy, skipping %d\n", tjetty_idx); + return 0; + } + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + return -EINVAL; + } + + tj_i = sentry_urma_dev[die_index].tjetty[tjetty_idx]; + + if (!sentry_urma_dev[die_index].jetty) { + pr_err("jetty not created! Please establish a link first\n"); + urma_mutex_lock_op(URMA_UNLOCK); + return COMM_PARM_NOT_SET; + } + + if (!tj_i) { + urma_mutex_lock_op(URMA_UNLOCK); + return -ENODEV; + } + + /* Configure send work request */ + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].opcode = UBCORE_OPC_SEND; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].tjetty = tj_i; + s_seg_va_i = (uint64_t)sentry_urma_dev[die_index].s_seg_va + + (SGE_MAX_LEN * tjetty_idx); + + /* Copy data to send segment */ + ret = snprintf((char *)s_seg_va_i, len, "%s", buf); + if ((size_t)ret >= len) { + pr_err("sentry_post_jetty_send_wr: send str size exceeds max\n"); + urma_mutex_lock_op(URMA_UNLOCK); + return -EINVAL; + } + + /* Set up scatter-gather element */ + actual_len = strnlen((char *)s_seg_va_i, len - 1) + 1; + sentry_urma_dev[die_index].s_sge[tjetty_idx].addr = s_seg_va_i; + sentry_urma_dev[die_index].s_sge[tjetty_idx].len = actual_len; + sentry_urma_dev[die_index].s_sge[tjetty_idx].tseg = + sentry_urma_dev[die_index].s_seg; + + /* Configure work request */ + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].send.src.sge = + &sentry_urma_dev[die_index].s_sge[tjetty_idx]; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].send.src.num_sge = 1; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].user_ctx = s_seg_va_i; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].flag.bs.complete_enable = 1; + + /* Post send work request */ + ret = ubcore_post_jetty_send_wr(sentry_urma_dev[die_index].jetty, + &sentry_urma_dev[die_index].jfs_wr[tjetty_idx], + &bad_wr); + if (ret) { + pr_err("ubcore_post_jetty_send_wr err\n"); + } else { + atomic_inc(&sentry_urma_dev[die_index].send_cnt[tjetty_idx]); + pr_info("ubcore_post_jetty_send_wr success\n"); + } + + urma_mutex_lock_op(URMA_UNLOCK); + return ret; +} + +/** + * urma_send_to_all_nodes - Send data to all configured nodes + * @buf: Data buffer to send + * @len: Length of data to send + * @die_index: Die index for resource access + * + * Return: Number of successful sends, negative error code on failure + * + * This function sends data to all configured remote nodes for a specific die, + * performing necessary checks and potential tjetty rebuilds before sending. + */ +static int urma_send_to_all_nodes(const char *buf, size_t len, int die_index) +{ + int cnt = 0; + int i; + + if (!buf || len == 0) + return -EINVAL; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + /* Update remote receive counters */ + if (update_remote_recv_cnt(die_index)) + return -EFAULT; + + /* sentry_urma_dev[die_index].server_eid[0] is local_eid */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + int ret = 0; + + /* Check and rebuild tjetty if needed (skip in panic mode) */ + if (!sentry_urma_ctx.is_panic_mode) + ret = check_and_rebuild_single_tjetty(i, die_index); + + if (!ret) { + pr_info("start to send msg [%s] to [%s]\n", buf, + sentry_urma_dev[die_index].server_eid_array[i]); + ret = sentry_post_jetty_send_wr(buf, len, i, die_index); + } + + if (ret == COMM_PARM_NOT_SET) + return COMM_PARM_NOT_SET; + + if (ret == 0) + cnt++; + } + + return cnt; +} + +/** + * urma_send_to_given_node - Send data to a specific node + * @buf: Data buffer to send + * @len: Length of data to send + * @dst_eid: Destination EID string + * @die_index: Die index for resource access (-1 if unknown) + * + * Return: 1 on successful send, 0 if not sent, negative error code on failure + * + * This function sends data to a specific node identified by EID, performing + * necessary validation and potential tjetty rebuild before sending. + */ +static int urma_send_to_given_node(const char *buf, size_t len, + const char *dst_eid, int die_index) +{ + int cnt = 0; + int ret; + int node_idx = -1; + union ubcore_eid dst_ubcore_eid; + + if (!buf || len == 0 || !dst_eid) + return -EINVAL; + + /* Convert EID string to binary format */ + if (str_to_eid(dst_eid, &dst_ubcore_eid) < 0) { + pr_err("urma_send: invalid dst eid [%s]\n", dst_eid); + return -EINVAL; + } + + /* Find node and die indices */ + match_index_by_remote_ub_eid(dst_ubcore_eid, &node_idx, &die_index); + if (node_idx < 0) { + pr_warn("urma_send: msg format invalid, str [%s]\n", buf); + return 0; + } + + /* Update remote receive counters */ + ret = update_remote_recv_cnt(die_index); + if (ret) + return ret; + + /* Check and rebuild tjetty if needed (skip in panic mode) */ + if (!sentry_urma_ctx.is_panic_mode) + ret = check_and_rebuild_single_tjetty(node_idx, die_index); + + if (!ret) { + pr_info("start to send msg [%s] to [%s]\n", buf, dst_eid); + ret = sentry_post_jetty_send_wr(buf, len, node_idx, die_index); + } + + if (!ret) + cnt++; + + return cnt; +} + +/** + * urma_send - Send data to URMA nodes + * @buf: Data buffer to send + * @len: Length of data to send + * @dst_eid: Destination EID (NULL for broadcast to all nodes) + * @die_index: Die index (-1 for auto-detect, 0/1 for specific die) + * + * Return: Number of successful sends, negative error code on failure + * + * This function provides the main interface for sending data via URMA, + * supporting both broadcast and unicast modes. + */ +int urma_send(const char *buf, size_t len, const char *dst_eid, int die_index) +{ + int cnt = 0; + + if (!g_is_created_ubcore_resource) + return -ENODEV; + + if (!dst_eid && die_index >= 0) { + /* Broadcast mode: send to all nodes */ + cnt = urma_send_to_all_nodes(buf, len, die_index); + } else { + /* Unicast mode: send to specific node */ + cnt = urma_send_to_given_node(buf, len, dst_eid, die_index); + } + + return cnt; +} +EXPORT_SYMBOL(urma_send); + +/** + * urma_recv - Receive data from URMA nodes + * @buf_arr: Array of buffers to store received messages + * @len: Maximum length for each received message + * + * Return: Number of valid messages received, negative error code on failure + * + * This function polls for incoming messages, handles heartbeat protocol, + * and returns valid event messages to the caller. + */ +int urma_recv(char **buf_arr, size_t len) +{ + int ret; + int valid_msg_num = 0; + char recv_msg[URMA_SEND_DATA_MAX_LEN] = {0}; + int die_index; + + if (!buf_arr) + return -EINVAL; + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + return -EBUSY; + + if (!g_is_created_ubcore_resource) { + urma_mutex_lock_op(URMA_UNLOCK); + return -ENODEV; + } + urma_mutex_lock_op(URMA_UNLOCK); + + /* Check each die for incoming messages */ + for (die_index = 0; die_index < sentry_urma_ctx.local_eid_num_configured; die_index++) { + int cnt; + + memset(sentry_urma_ctx.urma_recv_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + if (!sentry_urma_dev[die_index].is_created) { + urma_mutex_lock_op(URMA_UNLOCK); + break; + } + + cnt = ubcore_poll_jfc(sentry_urma_dev[die_index].receiver_jfc, + MAX_NODE_NUM, sentry_urma_ctx.urma_recv_cr); + urma_mutex_lock_op(URMA_UNLOCK); + + if (cnt < 0) { + pr_err("urma_recv: ubcore_poll_jfc failed for eid %s, ret %d\n", + sentry_urma_dev[die_index].server_eid_array[0], cnt); + continue; + } else if (cnt == 0) { + /* No messages available */ + continue; + } + + /* Process each completion */ + for (int i = 0; i < cnt; i++) { + int node_idx = -1; + int tmp_die_index = die_index; + + /* Extract message from completion context */ + ret = snprintf(recv_msg, len, "%s", (char *)sentry_urma_ctx.urma_recv_cr[i].user_ctx); + if ((size_t)ret >= len) { + pr_warn("urma recv: msg size exceeds max len %lu\n", len); + continue; + } + + /* Match remote EID to node index */ + match_index_by_remote_ub_eid(sentry_urma_ctx.urma_recv_cr[i].remote_id.eid, &node_idx, &tmp_die_index); + if (node_idx < 0) { + pr_warn("urma_recv: cr[%d] eid (%llx, %x, %x) not matched, msg [%s]\n", + i, sentry_urma_ctx.urma_recv_cr[i].remote_id.eid.in4.reserved, + sentry_urma_ctx.urma_recv_cr[i].remote_id.eid.in4.prefix, + sentry_urma_ctx.urma_recv_cr[i].remote_id.eid.in4.addr, recv_msg); + continue; + } + + pr_info("urma_recv: cr[%d] get msg [%s] from node[%d] eid=%s\n", + i, recv_msg, node_idx, + sentry_urma_dev[tmp_die_index].server_eid_array[node_idx]); + + /* Handle different message types */ + if (!strcmp(recv_msg, HEARTBEAT)) { + /* Heartbeat request - send acknowledgment */ + pr_info("urma_recv: received heartbeat from node[%d] eid=%s, send ack\n", + node_idx, sentry_urma_dev[tmp_die_index].server_eid_array[node_idx]); + sentry_post_jetty_send_wr(HEARTBEAT_ACK, strlen(HEARTBEAT_ACK) + 1, + node_idx, tmp_die_index); + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + memset(sentry_urma_ctx.urma_recv_sender_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + sentry_poll_jfc(sentry_urma_dev[tmp_die_index].sender_jfc, + MAX_NODE_NUM, sentry_urma_ctx.urma_recv_sender_cr, tmp_die_index); + urma_mutex_lock_op(URMA_UNLOCK); + } else if (!strcmp(recv_msg, HEARTBEAT_ACK)) { + /* Heartbeat acknowledgment - update status */ + pr_info("urma_recv: received heartbeat ack from node[%d] eid=%s\n", + node_idx, sentry_urma_dev[tmp_die_index].server_eid_array[node_idx]); + atomic_set(&sentry_urma_dev[tmp_die_index].urma_hb_ack_list[node_idx], 1); + } else { + /* Event message - store for caller */ + memcpy(buf_arr[valid_msg_num], recv_msg, sizeof(recv_msg)); + valid_msg_num++; + } + + /* Repost receive work request */ + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + ret = sentry_post_recv(sentry_urma_dev[tmp_die_index].jetty, + sentry_urma_dev[tmp_die_index].r_seg, + node_idx, tmp_die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + if (ret < 0) + pr_warn("urma_recv: sentry_post_recv failed, ret %d\n", ret); + } + } + + return valid_msg_num; +} +EXPORT_SYMBOL(urma_recv); + +/** + * reboot_cleanup_notifier - System reboot notifier callback + * @nb: Notifier block + * @action: Reboot action + * @data: Notifier data + * + * Return: NOTIFY_DONE + * + * This function ensures proper cleanup of URMA resources during system reboot. + */ +static int reboot_cleanup_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + if (action == SYS_RESTART && sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + return NOTIFY_DONE; +} + +static struct notifier_block reboot_cleanup_nb = { + .notifier_call = reboot_cleanup_notifier, + .priority = INT_MAX, +}; + +/** + * sentry_urma_comm_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + * + * This function initializes the URMA communication module, creating proc + * files, allocating buffers, and registering reboot notifier. + */ +static int __init sentry_urma_comm_init(void) +{ + int ret = 0; + + sentry_urma_ctx.proc_dir = proc_mkdir_mode(PROC_DEVICE_PATH, 0550, NULL); + if (!sentry_urma_ctx.proc_dir) { + pr_err("create /proc/%s dir failed\n", PROC_DEVICE_PATH); + return -ENOMEM; + } + + ret |= sentry_create_proc_file(PROC_DEVICE_NAME, sentry_urma_ctx.proc_dir, + &proc_client_info_file_operations); + ret |= sentry_create_proc_file(PROC_HEARTBEAT_SWITCH, sentry_urma_ctx.proc_dir, + &proc_heartbeat_file_operations); + if (ret < 0) + goto remove_proc_dir; + + ret = init_global_char(); + if (ret) + goto remove_proc_dir; + + ret = register_reboot_notifier(&reboot_cleanup_nb); + if (ret) { + pr_info("reboot_cleanup_nb register failed: %d\n", ret); + goto free_mem; + } + + pr_info("reboot_cleanup_nb registered\n"); + return 0; + +free_mem: + free_global_char(); +remove_proc_dir: + proc_remove(sentry_urma_ctx.proc_dir); + return ret; +} + +/** + * sentry_urma_comm_exit - Module cleanup function + * + * This function cleans up all URMA resources, stops threads, and removes + * proc files during module unload. + */ +static void __exit sentry_urma_comm_exit(void) +{ + unregister_reboot_notifier(&reboot_cleanup_nb); + pr_info("reboot_cleanup_nb unregistered\n"); + + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + + release_all_resource(); + + if (sentry_urma_ctx.proc_dir) + proc_remove(sentry_urma_ctx.proc_dir); + + pr_info("ubcore release\n"); + free_global_char(); +} + +module_init(sentry_urma_comm_init); +module_exit(sentry_urma_comm_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("luckky"); +MODULE_DESCRIPTION("Kernel module to transport msg via URMA"); diff --git a/drivers/ub/sentry/sentry_uvb_comm.c b/drivers/ub/sentry/sentry_uvb_comm.c new file mode 100644 index 0000000000000000000000000000000000000000..e7fef72e0670ae3f8654efec44af67740d603d0a --- /dev/null +++ b/drivers/ub/sentry/sentry_uvb_comm.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: support UVB communication + * Author: sxt1001 + * Create: 2025-04-23 + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "smh_common_type.h" +#include "smh_message.h" + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][uvb]: " fmt + +uint32_t g_local_cna = -1; +EXPORT_SYMBOL(g_local_cna); + +static struct proc_dir_entry *uvb_proc_dir; +static char *g_kbuf_server_cna; // cna1;cna2;cna3...cnan +uint32_t g_server_cna_array[MAX_NODE_NUM]; +int g_server_cna_valid_num; +EXPORT_SYMBOL(g_server_cna_array); +EXPORT_SYMBOL(g_server_cna_valid_num); + +/* + * @brief send data to server by UVB + * + * @param1: Data to be sent + * @param2: Indicates the CNA information of the specified server. + * If dst_cna is greater than CNA_MAX_VALUE, no server is + * specified. In this case, data needs to be sent to all nodes. + * @param3: UVB mode. If env is in panic status, We need to use + * synchronization mode, set is_sync to true. + * @return Number of nodes that are successfully sent + * */ +int uvb_send(const char *str, uint32_t dst_cna, bool is_sync) +{ + int res, cnt = 0; + + struct cis_message msg; + + msg.input = (char *)str; + msg.input_size = strlen(str) + 1; + msg.output = NULL; + msg.p_output_size = NULL; + + if (dst_cna < CNA_MAX_VALUE) { // dst cna is valid, send data to specific node + res = cis_call_by_uvb(UBIOS_CALL_ID_PANIC_CALL, UVB_SENDER_ID_SYSSENTRY, + UVB_RECEIVER_ID_SYSSENTRY(dst_cna), &msg, is_sync); + if (res != 0) { + pr_err("Send to a specified node, cis_call_by_uvb send data %s to %u failed.\n", str, dst_cna); + return -1; + } + cnt++; + pr_info("Send to a specified node, cis_call_by_uvb send data %s to %u success.\n", str, dst_cna); + return cnt; + } + + // dst_cna is invalid, send data to all nodes. + for (int i = 0; i < g_server_cna_valid_num; i++) { + if (g_server_cna_array[i] < CNA_MAX_VALUE) { + pr_info("Broadcast mode. receiver cna is %d, received id is %#x.\n", g_server_cna_array[i], + UVB_RECEIVER_ID_SYSSENTRY(g_server_cna_array[i])); + res = cis_call_by_uvb(UBIOS_CALL_ID_PANIC_CALL, UVB_SENDER_ID_SYSSENTRY, + UVB_RECEIVER_ID_SYSSENTRY(g_server_cna_array[i]), &msg, is_sync); + if (res != 0) { + pr_err("cis_call_by_uvb send data %s to %u failed.\n", str, g_server_cna_array[i]); + continue; + } + pr_info("cis_call_by_uvb send data %s to %u success.\n", str, g_server_cna_array[i]); + cnt++; + } + } + return cnt; +} +EXPORT_SYMBOL(uvb_send); + +static int convert_server_cna_str_to_u32_array(const char *server_cna) +{ + int server_cna_valid_num = 0, ret = 0; + uint32_t server_cna_array[MAX_NODE_NUM]; + char *token; + + char *server_cna_copy = kstrdup(server_cna, GFP_KERNEL); + char *rest = server_cna_copy; + + while ((token = strsep(&rest, ";"))) { + if (server_cna_valid_num >= MAX_NODE_NUM) { + pr_err("Invalid format for server_cna: cna max num is %d, the current input server_cna exceeds %d nodes.\n", MAX_NODE_NUM, MAX_NODE_NUM); + kfree(server_cna_copy); + return -EINVAL; + } + if (*token != '\0') { + ret = kstrtou32(token, 10, &server_cna_array[server_cna_valid_num]); + if (ret < 0) { + pr_err("Invalid format for server cna, str is %s\n", token); + kfree(server_cna_copy); + return -EINVAL; + } + if (server_cna_array[server_cna_valid_num] > CNA_MAX_VALUE) { + pr_err("Found invalid cna (%s), it should not be greater than %d\n", token, CNA_MAX_VALUE); + kfree(server_cna_copy); + return -EINVAL; + } + ++server_cna_valid_num; + } + } + pr_info("server cna num is %d\n", server_cna_valid_num); + + kfree(server_cna_copy); + + // input server_cna is valid, start to update global variables such as g_server_cna_valid_num and g_server_cna_array + g_server_cna_valid_num = server_cna_valid_num; + for (int i = 0; i < g_server_cna_valid_num; i++) { + g_server_cna_array[i] = server_cna_array[i]; + } + return 0; +} + +static ssize_t proc_uvb_server_cna_show(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, g_kbuf_server_cna, strlen(g_kbuf_server_cna)); +} + +static ssize_t proc_uvb_server_cna_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int ret = 0; + char server_cna_buf[(MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN]; + + if (count > (MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN - 1) { + pr_err("invalid value for server_cna mode.\n"); + return -EINVAL; + } + if (copy_from_user(server_cna_buf, user_buf, count)) { + pr_err("failed parse client info input: copy_from_user failed.\n"); + return -EFAULT; + } + server_cna_buf[count] = '\0'; + pr_info("proc_uvb_server_cna_write server_cna is %s\n", server_cna_buf); + + ret = convert_server_cna_str_to_u32_array(server_cna_buf); + if (ret) { + pr_err("convert_server_cna_str_to_u32_array failed\n"); + return -EINVAL; + } + snprintf(g_kbuf_server_cna, (MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN, "%s", server_cna_buf); + return count; +} + +static const struct proc_ops proc_uvb_server_cna_file_operations = { + .proc_read = proc_uvb_server_cna_show, + .proc_write = proc_uvb_server_cna_write, +}; + +static int __init uvb_comm_init(void) +{ + int ret = 0; + + for (int i = 0; i < MAX_NODE_NUM; i++) { + g_server_cna_array[i] = (uint32_t)-1; + } + + uvb_proc_dir = proc_mkdir_mode("sentry_uvb_comm", PROC_DIR_PERMISSION, NULL); + if (!uvb_proc_dir) { + pr_err("create /proc/sentry_uvb_comm dir failed.\n"); + return -ENOMEM; + } + + ret = sentry_create_proc_file("server_cna", uvb_proc_dir, &proc_uvb_server_cna_file_operations); + if (ret == -ENOMEM) { + goto remove_uvb_proc_dir; + } + + g_kbuf_server_cna = kzalloc((MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN, GFP_KERNEL); + if (!g_kbuf_server_cna) { + pr_err("kzalloc g_kbuf_server_cna failed!\n"); + ret = -ENOMEM; + goto remove_uvb_proc_dir; + } + pr_info("uvb communication is enabled.\n"); + return 0; + +remove_uvb_proc_dir: + proc_remove(uvb_proc_dir); + return ret; +} + +static void __exit uvb_comm_exit(void) +{ + if (uvb_proc_dir) { + proc_remove(uvb_proc_dir); + } + if (g_kbuf_server_cna) { + kfree(g_kbuf_server_cna); + g_kbuf_server_cna = NULL; + } + pr_info("uvb communication module unloaded\n"); +} + +module_init(uvb_comm_init); +module_exit(uvb_comm_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("sxt1001"); +MODULE_DESCRIPTION("Kernel module to send msg via UVB"); diff --git a/drivers/ub/sentry/smh_common_type.h b/drivers/ub/sentry/smh_common_type.h new file mode 100644 index 0000000000000000000000000000000000000000..069c04a23f0876a1081ae3a12de2dd4caf7bb33c --- /dev/null +++ b/drivers/ub/sentry/smh_common_type.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Common Header File for Sentry Module + * Author: Luckky + * Create: 2025-02-17 + */ + +#ifndef SMH_COMMON_TYPE_H +#define SMH_COMMON_TYPE_H + +#include +#include +#include +#include +#include +#include + +#define SMH_TYPE ('}') +#define MAX_DIE_NUM 2 +#define OOM_EVENT_MAX_NUMA_NODES 8 +#define MAX_NODE_NUM 32 +#define EID_MAX_LEN 40 // eid str len 39 + '\0' +#define REPORT_COMM_TIME 5000 +#define URMA_SEND_DATA_MAX_LEN (2 + EID_MAX_LEN * 2 + 10 + 1 + 4) // type_cna_eid_randomID_res +#define MILLISECONDS_OF_EACH_MDELAY 1000 +#define CNA_MAX_VALUE 0xffffff +#define INTEGER_TO_STR_MAX_LEN 22 +#define COMM_PARM_NOT_SET (-2) +#define HEARTBEAT "heartbeat" +#define HEARTBEAT_ACK "heartbeat_ack" +#define ENABLE_VALUE_MAX_LEN 4 // 'off' + '\0' + +#define URMA_REBUILD_THRESHOLD 3 +#define URMA_ACK_RETRY_NUM 10 + +#define PROC_FILE_PERMISSION 0600 +#define PROC_DIR_PERMISSION 0550 + +enum { + SMH_CMD_MSG_ACK = 0x10, +}; + +#define SMH_MSG_ACK _IO(SMH_TYPE, SMH_CMD_MSG_ACK) + +enum sentry_msg_helper_msg_type { + SMH_MESSAGE_POWER_OFF, + SMH_MESSAGE_OOM, + SMH_MESSAGE_PANIC, + SMH_MESSAGE_KERNEL_REBOOT, + SMH_MESSAGE_UB_MEM_ERR, + SMH_MESSAGE_PANIC_ACK, + SMH_MESSAGE_KERNEL_REBOOT_ACK, + SMH_MESSAGE_UNKNOWN, +}; + +struct sentry_msg_helper_msg { + enum sentry_msg_helper_msg_type type; + uint64_t msgid; + uint64_t start_send_time; + uint64_t timeout_time; + // reboot_info is empty + union { + struct { + int nr_nid; + int nid[OOM_EVENT_MAX_NUMA_NODES]; + int sync; + int timeout; + int reason; + } oom_info; + struct { + uint32_t cna; + char eid[EID_MAX_LEN]; + } remote_info; + struct { + uint64_t pa; + int mem_type; + int fault_with_kill; + enum ras_err_type raw_ubus_mem_err_type; + } ub_mem_info; + } helper_msg_info; + unsigned long res; +}; + +// urma communication interface +extern int urma_send(const char *buf, size_t len, const char *dst_eid, int die_index); +extern int urma_recv(char **buf_arr, size_t len); + +// UVB communication interface +extern int uvb_send(const char *str, uint32_t dst_cna, bool is_sync); + +extern uint32_t g_local_cna; +#define UVB_SENDER_ID_SYSSENTRY_INDEX (g_local_cna) +#define UVB_SENDER_ID_SYSSENTRY (UBIOS_USER_ID_RICH_OS | UVB_SENDER_ID_SYSSENTRY_INDEX) +#define UVB_RECEIVER_ID_SYSSENTRY(cna) (UBIOS_USER_ID_UB_DEVICE | (cna)) + +/* + * str format type_cna_eid or type_cna_eid_res. type_cna_eid_res is ack msg. + * */ +static inline int convert_str_to_smh_msg(const char *str, + struct sentry_msg_helper_msg *smh_msg, + uint32_t *random_id) +{ + int n; + int ret = 0; + char input_copy[URMA_SEND_DATA_MAX_LEN]; + + n = sscanf(str, "%d_%s", (int *)&smh_msg->type, input_copy); + if (n != 2) { + pr_warn("Invalid msg str format and parse type failed! str is [%s].\n", str); + return -EINVAL; + } + + switch (smh_msg->type) { + case SMH_MESSAGE_PANIC: + case SMH_MESSAGE_KERNEL_REBOOT: + // eid length is EID_MAX_LEN - 1 + if (!(sscanf(input_copy, "%u_%39[^_]_%llu_%u%n", + &smh_msg->helper_msg_info.remote_info.cna, + smh_msg->helper_msg_info.remote_info.eid, + &smh_msg->timeout_time, + random_id, + &n) == 4) || strlen(input_copy) != n) { + pr_warn("Invalid msg str format and parse cna/eid failed! str is [%s].\n", str); + ret = -1; + } + break; + case SMH_MESSAGE_PANIC_ACK: + case SMH_MESSAGE_KERNEL_REBOOT_ACK: + if (!(sscanf(input_copy, "%u_%39[^_]_%lu%n", + &smh_msg->helper_msg_info.remote_info.cna, + smh_msg->helper_msg_info.remote_info.eid, + &smh_msg->res, + &n) == 3) || strlen(input_copy) != n) { + pr_warn("Invalid msg str format and parse cna/eid failed! str is [%s].\n", str); + ret = -1; + } + break; + default: + pr_warn("Invalid event type!\n"); + ret = -1; + } + return ret; +} + +static inline void free_char_array(char **array_ptr, int array_len) +{ + if (array_ptr) { + for (int i = 0; i < array_len; i++) { + if (array_ptr[i]) { + kfree(array_ptr[i]); + array_ptr[i] = NULL; + } + } + kfree(array_ptr); + array_ptr = NULL; + } +} + +/* + * Return 1 when buf is valid ipv4 format, return 0 when buf is invalid ipv4 format + * or any error occurs. + * +*/ +static inline int is_valid_ipv4(const char *buf) +{ + int ret; + __be32 addr; + + if (buf == NULL) { + return 0; + } + + ret = in4_pton(buf, strnlen(buf, EID_MAX_LEN), (u8 *)&addr, '\0', NULL); + return ret; +} + +static inline int sentry_create_proc_file(const char *name, struct proc_dir_entry *parent, + const struct proc_ops *proc_ops) +{ + int ret = 0; + + if (!proc_create(name, PROC_FILE_PERMISSION, parent, proc_ops)) { + pr_err("create proc file %s failed.\n", name); + ret = -ENOMEM; + } + return ret; +} +#endif diff --git a/drivers/ub/sentry/smh_core.c b/drivers/ub/sentry/smh_core.c new file mode 100644 index 0000000000000000000000000000000000000000..61103c551228da6106abfb7de64badb119ed4077 --- /dev/null +++ b/drivers/ub/sentry/smh_core.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: Sentry Msg Helper + * Author: Luckky + * Create: 2025-02-17 + */ + +#include +#include +#include +#include +#include + +#include "smh_message.h" + +#define SMH_DEV_NAME "sentry_msg_helper" + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][message_helper]: " fmt + +/** + * smh_dev_ioctl - IOCTL handler for sentry message helper device + * @file: File structure pointer + * @cmd: IOCTL command + * @arg: User space argument + * + * Return: 0 on success, negative error code on failure + */ +static long smh_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case SMH_MSG_ACK: { + struct sentry_msg_helper_msg cmd_msg; + + if (copy_from_user(&cmd_msg, (void __user *)arg, sizeof(cmd_msg))) + return -EFAULT; + + ret = smh_message_ack(&cmd_msg); + break; + } + default: + ret = -EINVAL; + } + + return ret; +} + +/** + * smh_dev_open - Open handler for sentry message helper device + * @inode: Inode structure pointer + * @file: File structure pointer + * + * Return: 0 on success + */ +static int smh_dev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +/** + * smh_dev_flush - Flush handler for sentry message helper device + * @file: File structure pointer + * @owner: File owner ID + * + * Return: 0 on success + */ +static int smh_dev_flush(struct file *file, fl_owner_t owner) +{ + return 0; +} + +/** + * smh_dev_read - Read handler for sentry message helper device + * @filp: File structure pointer + * @buf: User space buffer + * @count: Number of bytes to read + * @f_pos: File position pointer + * + * Return: Number of bytes read on success, zero or negative error code on failure + */ +static ssize_t smh_dev_read(struct file *filp, char __user *buf, size_t count, + loff_t *f_pos) +{ + if (count != sizeof(struct sentry_msg_helper_msg)) { + pr_err("smh_dev_read: read size mismatch\n"); + return 0; + } + + return smh_message_get(buf); +} + +static const struct file_operations smh_dev_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = smh_dev_ioctl, + .open = smh_dev_open, + .read = smh_dev_read, + .flush = smh_dev_flush, +}; + +static struct miscdevice smh_dev_handle = { + .minor = MISC_DYNAMIC_MINOR, + .name = SMH_DEV_NAME, + .fops = &smh_dev_fops, +}; + +/** + * smh_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + */ +static int __init smh_init(void) +{ + int ret; + + ret = smh_message_init(); + if (ret) { + pr_err("Failed to init smh message. retval=%d\n", ret); + return ret; + } + pr_info("smh init successfully.\n"); + + ret = misc_register(&smh_dev_handle); + if (ret) { + pr_err("Failed to register smh device. retval=%d\n", ret); + smh_message_exit(); + return ret; + } + pr_info("smh device registered successfully.\n"); + + return 0; +} + +/** + * smh_exit - Module cleanup function + */ +static void __exit smh_exit(void) +{ + misc_deregister(&smh_dev_handle); + smh_message_exit(); +} + +module_init(smh_init); +module_exit(smh_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Luckky"); +MODULE_DESCRIPTION("SMH: Sentry Msg Helper"); +MODULE_VERSION("1.0"); diff --git a/drivers/ub/sentry/smh_message.c b/drivers/ub/sentry/smh_message.c new file mode 100644 index 0000000000000000000000000000000000000000..971e659244d4c38126d6cca724b073933ebe63ca --- /dev/null +++ b/drivers/ub/sentry/smh_message.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: Sentry Msg Helper + * Author: Luckky + * Create: 2025-02-17 + */ + +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" + +static DEFINE_RATELIMIT_STATE(msg_log_rs, HZ, 10); + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][message_helper]: " fmt + +#define RM_LOG_INFO(fmt, ...) \ + do { \ + if (__ratelimit(&msg_log_rs)) { \ + printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__); \ + } \ + } while (0) + + +#define RM_LOG_WARN(fmt, ...) \ + do { \ + if (__ratelimit(&msg_log_rs)) { \ + printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__); \ + } \ + } while (0) + +#define RM_LOG_ERR(fmt, ...) \ + do { \ + if (__ratelimit(&msg_log_rs)) { \ + printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__); \ + } \ + } while (0) + +#define SMH_MESSAGE_BUFFER_LENGTH 256 +#define SMH_MESSAGE_BUFFER_MAX_LENGTH 4096 + + +static int smh_message_buffer_length = SMH_MESSAGE_BUFFER_LENGTH; +module_param(smh_message_buffer_length, int, 0444); + +/** + * FIND_AND_REMOVE_TIMEOUT_FROM_LIST - Macro to find and remove message from list + * @handle: Pointer to store found handle + * @lock: Spinlock to protect the list + * @list_head: List head to search + * @member: List member name in the structure + * @msgid_target: Target message ID to find + * @found: Boolean to indicate if message was found + * + * This macro searches for a message in the list by ID, removes timeout messages, + * and returns the found message handle. + */ +#define FIND_AND_REMOVE_TIMEOUT_FROM_LIST(handle, lock, list_head, member, msgid_target, found) \ + do { \ + spin_lock(lock); \ + { \ + typeof(handle) __cur, __tmp; \ + list_for_each_entry_safe(__cur, __tmp, list_head, member) { \ + if (check_msg_is_timeout(&__cur->msg)) { \ + list_del(&__cur->member); \ + kfree(__cur); \ + handle = NULL; \ + continue; \ + } \ + if (__cur->msg.msgid == (msgid_target)) { \ + found = true; \ + list_del(&__cur->member); \ + handle = __cur; \ + break; \ + } \ + } \ + } \ + spin_unlock(lock); \ + } while (0) + +struct smh_msg_handler { + struct sentry_msg_helper_msg msg; + bool ack; + struct list_head ack_list; + struct list_head get_list; +}; + +struct smh_msg_ctx { + struct kfifo msgbuf_send; + spinlock_t msgbuf_send_lock; + + struct list_head msgbuf_ack; + spinlock_t msgbuf_ack_lock; + + struct list_head msgbuf_get; + spinlock_t msgbuf_get_lock; + + struct wait_queue_head user_wq; +}; + +static struct smh_msg_ctx msg_ctx; +static atomic64_t message_id_generator; /* [1, message_id_generator] */ + +/** + * smh_get_new_msg_id - Generate a new unique message ID + * + * Return: New message ID + */ +uint64_t smh_get_new_msg_id(void) +{ + return atomic64_inc_return(&message_id_generator); +} +EXPORT_SYMBOL(smh_get_new_msg_id); + +/** + * check_msg_is_timeout - Check if message has timed out + * @msg: Message to check + * + * Return: true if timeout, false otherwise + */ +static bool check_msg_is_timeout(struct sentry_msg_helper_msg *msg) +{ + uint64_t now = ktime_get_ns(); + uint64_t interval_time = (now - msg->start_send_time) / NSEC_PER_MSEC; + + return interval_time > msg->timeout_time; +} + +/** + * smh_message_send - Send a message through the message helper + * @msg: Message to send + * @ack: Whether acknowledgment is required + * + * Return: 0 on success, negative error code on failure + */ +int smh_message_send(struct sentry_msg_helper_msg *msg, bool ack) +{ + int ret = 0; + struct smh_msg_handler *handle; + + if (!msg->msgid) { + RM_LOG_ERR("please set the correct msgid by 'smh_get_new_msg_id', stop to send this msg\n"); + return -EINVAL; + } + + handle = kzalloc(sizeof(*handle), GFP_ATOMIC); + if (!handle) { + RM_LOG_ERR("failed to alloc message handle\n"); + return -ENOMEM; + } + + handle->msg = *msg; + handle->ack = ack; + + RM_LOG_INFO("smh_message_send: %llu start\n", msg->msgid); + + ret = kfifo_in_spinlocked(&msg_ctx.msgbuf_send, &handle, + sizeof(handle), &msg_ctx.msgbuf_send_lock); + if (!ret) { + RM_LOG_ERR("error sending message %llu: buffer is full; message dropped\n", + msg->msgid); + kfree(handle); + return -EAGAIN; + } + + /* Check if someone is waiting */ + if (waitqueue_active(&msg_ctx.user_wq)) + wake_up(&msg_ctx.user_wq); + + RM_LOG_INFO("smh_message_send: %llu end\n", msg->msgid); + + return 0; +} +EXPORT_SYMBOL(smh_message_send); + +/** + * smh_message_get - Get a message from the message helper + * @buf: User space buffer to copy message to + * + * Return: Number of bytes copied on success, negative error code on failure + */ +ssize_t smh_message_get(void __user *buf) +{ + int ret; + struct smh_msg_handler *handle = NULL; + struct smh_msg_handler *handle_ack; + DEFINE_WAIT(wait); + + if (waitqueue_active(&msg_ctx.user_wq)) { + RM_LOG_WARN("another process is waiting for message\n"); + return -EPERM; + } + + do { + ret = kfifo_out_spinlocked(&msg_ctx.msgbuf_send, &handle, + sizeof(handle), &msg_ctx.msgbuf_send_lock); + if (ret) { + if (check_msg_is_timeout(&handle->msg)) { + RM_LOG_INFO("smh_message_get: %llu timeout\n", handle->msg.msgid); + kfree(handle); + handle = NULL; + continue; + } + break; + } + + add_wait_queue_exclusive(&msg_ctx.user_wq, &wait); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + set_current_state(TASK_RUNNING); + remove_wait_queue(&msg_ctx.user_wq, &wait); + if (signal_pending(current)) { + RM_LOG_ERR("error reading message: process receive signal\n"); + return -ERESTART; + } + } while (1); + + if (!handle) + return -ENOMSG; + + RM_LOG_INFO("smh_message_get: get msg, msgid is %llu\n", handle->msg.msgid); + + ret = copy_to_user(buf, &handle->msg, sizeof(handle->msg)); + if (ret) { + RM_LOG_ERR("%s: failed to copy message to user: %d\n", __func__, ret); + ret = kfifo_in_spinlocked(&msg_ctx.msgbuf_send, &handle, + sizeof(handle), &msg_ctx.msgbuf_send_lock); + if (!ret) { + RM_LOG_ERR("error recover message %llu: buffer is full; message dropped\n", + handle->msg.msgid); + kfree(handle); + return -EFAULT; + } + return -EAGAIN; + } + + if (handle->ack) { + bool found = false; + + spin_lock(&msg_ctx.msgbuf_ack_lock); + list_for_each_entry(handle_ack, &msg_ctx.msgbuf_ack, ack_list) { + if (handle_ack->msg.msgid == handle->msg.msgid) { + found = true; + break; + } + } + if (!found) + list_add_tail(&handle->ack_list, &msg_ctx.msgbuf_ack); + spin_unlock(&msg_ctx.msgbuf_ack_lock); + } else { + kfree(handle); + } + + return sizeof(handle->msg); +} + +/** + * smh_message_ack - Acknowledge a message + * @msg: Message to acknowledge + * + * Return: 0 on success, negative error code on failure + */ +int smh_message_ack(struct sentry_msg_helper_msg *msg) +{ + struct smh_msg_handler *handle; + bool found = false; + + RM_LOG_INFO("smh_message_ack: %llu\n", msg->msgid); + + FIND_AND_REMOVE_TIMEOUT_FROM_LIST(handle, &msg_ctx.msgbuf_ack_lock, + &msg_ctx.msgbuf_ack, ack_list, + msg->msgid, found); + + if (!found) { + RM_LOG_ERR("smh_message_ack: %llu not found, maybe this message is not exist or has been timeout\n", + msg->msgid); + return -ENOENT; + } + + handle->msg.res = msg->res; + + spin_lock(&msg_ctx.msgbuf_get_lock); + list_add_tail(&handle->get_list, &msg_ctx.msgbuf_get); + spin_unlock(&msg_ctx.msgbuf_get_lock); + + return 0; +} + +/** + * smh_message_get_ack - Get acknowledgment for a message + * @msg: Message to get acknowledgment for + * + * Return: 1 if acknowledgment found, 0 otherwise + */ +int smh_message_get_ack(struct sentry_msg_helper_msg *msg) +{ + struct smh_msg_handler *handle; + bool found = false; + + FIND_AND_REMOVE_TIMEOUT_FROM_LIST(handle, &msg_ctx.msgbuf_get_lock, + &msg_ctx.msgbuf_get, get_list, + msg->msgid, found); + + if (found) { + msg->res = handle->msg.res; + kfree(handle); + } + + return found; +} +EXPORT_SYMBOL(smh_message_get_ack); + +/** + * smh_message_init - Initialize the message helper subsystem + * + * Return: 0 on success, negative error code on failure + */ +int smh_message_init(void) +{ + int ret; + + if (smh_message_buffer_length <= 0 || + smh_message_buffer_length > SMH_MESSAGE_BUFFER_MAX_LENGTH) { + RM_LOG_ERR("invalid smh_message_buffer_length\n"); + return -EINVAL; + } + + ret = kfifo_alloc(&msg_ctx.msgbuf_send, + sizeof(struct smh_msg_handler *) * smh_message_buffer_length, + GFP_KERNEL); + if (ret < 0) { + RM_LOG_ERR("error allocating send message buffer: %d\n", ret); + return ret; + } + spin_lock_init(&msg_ctx.msgbuf_send_lock); + + INIT_LIST_HEAD(&msg_ctx.msgbuf_ack); + spin_lock_init(&msg_ctx.msgbuf_ack_lock); + + INIT_LIST_HEAD(&msg_ctx.msgbuf_get); + spin_lock_init(&msg_ctx.msgbuf_get_lock); + + init_waitqueue_head(&msg_ctx.user_wq); + atomic64_set(&message_id_generator, 0); + + return 0; +} + +/** + * smh_message_exit - Cleanup the message helper subsystem + */ +void smh_message_exit(void) +{ + struct smh_msg_handler *handle, *tmp; + + /* Clean up acknowledgment list */ + spin_lock(&msg_ctx.msgbuf_ack_lock); + list_for_each_entry_safe(handle, tmp, &msg_ctx.msgbuf_ack, ack_list) { + list_del(&handle->ack_list); + kfree(handle); + } + spin_unlock(&msg_ctx.msgbuf_ack_lock); + + /* Clean up get list */ + spin_lock(&msg_ctx.msgbuf_get_lock); + list_for_each_entry_safe(handle, tmp, &msg_ctx.msgbuf_get, get_list) { + list_del(&handle->get_list); + kfree(handle); + } + spin_unlock(&msg_ctx.msgbuf_get_lock); + + kfifo_free(&msg_ctx.msgbuf_send); +} diff --git a/drivers/ub/sentry/smh_message.h b/drivers/ub/sentry/smh_message.h new file mode 100644 index 0000000000000000000000000000000000000000..f9bac3ab6a3597fca1067ca9d26c8fdc3cb27d89 --- /dev/null +++ b/drivers/ub/sentry/smh_message.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Header File for Sentry Msg Helper + * Author: Luckky + * Create: 2025-02-17 + */ + +#ifndef SMH_MESSAGE_H +#define SMH_MESSAGE_H + +#include +#include "smh_common_type.h" + +uint64_t smh_get_new_msg_id(void); +int smh_message_send(struct sentry_msg_helper_msg *msg, bool ack); +ssize_t smh_message_get(void __user *buf); +int smh_message_ack(struct sentry_msg_helper_msg *msg); +int smh_message_get_ack(struct sentry_msg_helper_msg *msg); + +int smh_message_init(void); +void smh_message_exit(void); + +#endif diff --git a/drivers/ub/ubus/Makefile b/drivers/ub/ubus/Makefile index 7456bdb0a78766c7cd02bbc2ce07ca412da9dbf2..59505977dd2f3e15e2ce3f7f7ab94a5651c67cb2 100644 --- a/drivers/ub/ubus/Makefile +++ b/drivers/ub/ubus/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_UB_UBUS) += msi/ ubus-y := ubus_driver.o sysfs.o ubus_controller.o msg.o ubus_config.o port.o cc.o eid.o cna.o route.o ubus-y += enum.o resource.o ubus_entity.o reset.o cap.o interrupt.o decoder.o omm.o ioctl.o eu.o link.o -ubus-y += instance.o pool.o +ubus-y += instance.o pool.o memory.o ubus-y += services/ras.o services/service.o services/gucd.o ubus-y += services/hotplug/hotplug_core.o services/hotplug/hotplug_ctrl.o diff --git a/drivers/ub/ubus/memory.c b/drivers/ub/ubus/memory.c new file mode 100644 index 0000000000000000000000000000000000000000..e7b3144db4bd4974b9d627c51dc5c752b6c3f1d4 --- /dev/null +++ b/drivers/ub/ubus/memory.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#define pr_fmt(fmt) "ubus memory: " fmt + +#include +#include "ubus.h" +#include "ubus_controller.h" +#include "memory.h" + +static ubmem_ras_handler handler; + +static bool ub_mem_uent_valid(struct ub_entity *uent) +{ + if (!uent || !uent->ubc) + return false; + + return is_ibus_controller(uent); +} + +void ub_mem_decoder_init(struct ub_entity *uent) +{ + struct ub_bus_controller *ubc; + int ret; + + if (!ub_mem_uent_valid(uent)) + return; + + ubc = uent->ubc; + if (ubc->ops && ubc->ops->mem_decoder_create) { + ret = ubc->ops->mem_decoder_create(ubc); + WARN_ON(ret); + } else { + dev_warn(&ubc->dev, + "ubc ops or ubc ops mem_decoder_create is null.\n"); + } +} + +void ub_mem_decoder_uninit(struct ub_entity *uent) +{ + struct ub_bus_controller *ubc; + + if (!ub_mem_uent_valid(uent)) + return; + + ubc = uent->ubc; + if (ubc->ops && ubc->ops->mem_decoder_remove) + ubc->ops->mem_decoder_remove(ubc); + else + dev_warn(&ubc->dev, "ubc ops mem_decoder_remove is null.\n"); +} + +void ub_mem_ras_handler_register(ubmem_ras_handler rh) +{ + handler = rh; +} +EXPORT_SYMBOL_GPL(ub_mem_ras_handler_register); + +void ub_mem_ras_handler_unregister(void) +{ + handler = NULL; +} +EXPORT_SYMBOL_GPL(ub_mem_ras_handler_unregister); + +ubmem_ras_handler ub_mem_ras_handler_get(void) +{ + return handler; +} +EXPORT_SYMBOL_GPL(ub_mem_ras_handler_get); + +void ub_mem_init_usi(struct ub_entity *uent) +{ + if (!uent->ubc) { + pr_err("ubc not exist, can't init usi\n"); + return; + } + + if (uent->ubc->ops && uent->ubc->ops->register_ubmem_irq) + uent->ubc->ops->register_ubmem_irq(uent->ubc); + else + dev_warn(&uent->ubc->dev, "ubc ops register_ubmem_irq is null.\n"); +} + +void ub_mem_uninit_usi(struct ub_entity *uent) +{ + if (!uent->ubc) { + pr_err("ubc not exist, can't uninit usi\n"); + return; + } + + if (uent->ubc->ops && uent->ubc->ops->unregister_ubmem_irq) + uent->ubc->ops->unregister_ubmem_irq(uent->ubc); + else + dev_warn(&uent->ubc->dev, "ubc ops unregister_ubmem_irq is null.\n"); +} + +void ub_mem_drain_start(u32 scna) +{ + struct ub_mem_device *mem_device; + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return; + } + + mem_device = ubc->mem_device; + if (!mem_device) { + dev_err(&ubc->dev, "ubc mem_device is null.\n"); + return; + } + + if (mem_device->ops && mem_device->ops->mem_drain_start) + mem_device->ops->mem_drain_start(mem_device); + else + dev_warn(mem_device->dev, "ub mem_device ops mem_drain_start is null.\n"); +} +EXPORT_SYMBOL_GPL(ub_mem_drain_start); + +int ub_mem_drain_state(u32 scna) +{ + struct ub_mem_device *mem_device; + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return -ENODEV; + } + + mem_device = ubc->mem_device; + if (!mem_device) { + dev_err(&ubc->dev, "ubc mem_device is null.\n"); + return -EINVAL; + } + + if (mem_device->ops && mem_device->ops->mem_drain_state) + return mem_device->ops->mem_drain_state(mem_device); + + dev_warn(mem_device->dev, "ub memory decoder ops mem_drain_state is null.\n"); + return 0; +} +EXPORT_SYMBOL_GPL(ub_mem_drain_state); + +int ub_mem_get_numa_id(u32 scna) +{ + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return NUMA_NO_NODE; + } + + return pxm_to_node(ubc->attr.proximity_domain); +} +EXPORT_SYMBOL_GPL(ub_mem_get_numa_id); + +bool ub_memory_validate_pa(u32 scna, u64 pa_start, u64 pa_end, bool cacheable) +{ + struct ub_mem_device *mem_device; + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return false; + } + + mem_device = ubc->mem_device; + if (!mem_device) { + dev_err(&ubc->dev, "ubc mem_device is null.\n"); + return false; + } + + if (mem_device->ops && mem_device->ops->mem_validate_pa) + return mem_device->ops->mem_validate_pa(ubc, pa_start, pa_end, + cacheable); + + dev_warn(mem_device->dev, "ub memory decoder ops mem_drain_state is null.\n"); + return false; +} +EXPORT_SYMBOL_GPL(ub_memory_validate_pa); diff --git a/drivers/ub/ubus/memory.h b/drivers/ub/ubus/memory.h new file mode 100644 index 0000000000000000000000000000000000000000..7c841b466f3ef3693b6b061590d0a2ef12694731 --- /dev/null +++ b/drivers/ub/ubus/memory.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#ifndef __MEMORY_H__ +#define __MEMORY_H__ + +#include +#include +#include +#include + +#define MAX_RAS_ERROR_SOURCES_CNT 256 + +void ub_mem_decoder_init(struct ub_entity *uent); +void ub_mem_decoder_uninit(struct ub_entity *uent); +void ub_mem_init_usi(struct ub_entity *uent); +void ub_mem_uninit_usi(struct ub_entity *uent); + +struct ub_mem_ras_err_info { + enum ras_err_type type; + u64 hpa; +}; + +struct ub_mem_ras_ctx { + DECLARE_KFIFO(ras_fifo, struct ub_mem_ras_err_info, + MAX_RAS_ERROR_SOURCES_CNT); +}; + +struct ub_mem_device_ops { + void (*mem_drain_start)(struct ub_mem_device *mem_device); + int (*mem_drain_state)(struct ub_mem_device *mem_device); + bool (*mem_validate_pa)(struct ub_bus_controller *ubc, u64 pa_start, + u64 pa_end, bool cacheable); + + KABI_RESERVE(1) + KABI_RESERVE(2) + KABI_RESERVE(3) + KABI_RESERVE(4) + KABI_RESERVE(5) + KABI_RESERVE(6) + KABI_RESERVE(7) + KABI_RESERVE(8) +}; + +struct ub_mem_device { + struct device *dev; + struct ub_entity *uent; + struct ub_mem_ras_ctx ras_ctx; + int ubmem_irq_num; + const struct ub_mem_device_ops *ops; + void *priv_data; + + KABI_RESERVE(1) + KABI_RESERVE(2) +}; + +#endif /* __MEMORY_H__ */ diff --git a/drivers/ub/ubus/services/gucd.c b/drivers/ub/ubus/services/gucd.c index 35a0cf35e20a4488219c8e0b5fe436f4c5695b9c..ca5f0a3578e8dc5e03373c4ae2aadafacd903778 100644 --- a/drivers/ub/ubus/services/gucd.c +++ b/drivers/ub/ubus/services/gucd.c @@ -8,6 +8,7 @@ #include "../ubus.h" #include "../decoder.h" #include "../ubus_driver.h" +#include "../memory.h" #include "service.h" static const struct ub_device_id component_device_ids[] = { @@ -128,15 +129,18 @@ static void ub_setup_bus_controller(struct ub_entity *uent) return; } - if ((u32)usi_count < vec_num_max) + if ((u32)usi_count < vec_num_max) { ub_err(uent, "alloc irq vectors failed, usi count=%d, vec_num_max=%u\n", usi_count, vec_num_max); - else + } else { ub_init_decoder_usi(uent); + ub_mem_init_usi(uent); + } } static void ub_unset_bus_controller(struct ub_entity *uent) { + ub_mem_uninit_usi(uent); ub_uninit_decoder_usi(uent); ub_disable_intr(uent); ub_disable_err_msq_ctrl(uent); diff --git a/drivers/ub/ubus/ubus_controller.h b/drivers/ub/ubus/ubus_controller.h index ae31e7c452380e7872b32b0808e38c8eb83b4300..4b3c7a74a41407bcd5cbbb5827acb45a4fda4f87 100644 --- a/drivers/ub/ubus/ubus_controller.h +++ b/drivers/ub/ubus/ubus_controller.h @@ -11,6 +11,10 @@ struct ub_bus_controller_ops { int (*eu_table_init)(struct ub_bus_controller *ubc); void (*eu_table_uninit)(struct ub_bus_controller *ubc); int (*eu_cfg)(struct ub_bus_controller *ubc, bool flag, u32 eid, u16 upi); + int (*mem_decoder_create)(struct ub_bus_controller *ubc); + void (*mem_decoder_remove)(struct ub_bus_controller *ubc); + void (*register_ubmem_irq)(struct ub_bus_controller *ubc); + void (*unregister_ubmem_irq)(struct ub_bus_controller *ubc); void (*register_decoder_base_addr)(struct ub_bus_controller *ubc, u64 *cmd_queue, u64 *event_queue); int (*entity_enable)(struct ub_entity *uent, u8 enable); diff --git a/drivers/ub/ubus/ubus_entity.c b/drivers/ub/ubus/ubus_entity.c index dc9bccff9044e55ad14a29b91d778905dd34c1fc..fcea27373ccb2cea88d7b4d84906bf702a006ca6 100644 --- a/drivers/ub/ubus/ubus_entity.c +++ b/drivers/ub/ubus/ubus_entity.c @@ -17,6 +17,7 @@ #include "eid.h" #include "cna.h" #include "resource.h" +#include "memory.h" #include "ubus_controller.h" #include "ubus_driver.h" #include "ubus_inner.h" @@ -438,6 +439,7 @@ void ub_start_ent(struct ub_entity *uent) WARN_ON(ret); ub_create_sysfs_dev_files(uent); + ub_mem_decoder_init(uent); if (!((is_p_device(uent) || is_p_idevice(uent)) && is_dynamic(uent->bi))) { uent->match_driver = true; @@ -531,6 +533,7 @@ void ub_remove_ent(struct ub_entity *uent) list_del(&uent->node); up_write(&ub_bus_sem); + ub_mem_decoder_uninit(uent); ub_uninit_capabilities(uent); ub_unconfigure_ent(uent); ub_entity_unset_mmio(uent); diff --git a/drivers/ub/ubus/vendor/hisilicon/Makefile b/drivers/ub/ubus/vendor/hisilicon/Makefile index 653de75f253922848168ab7d5989b93d832f38ef..998c0e09aeef1019f8ad67987bc616dacb1a7005 100644 --- a/drivers/ub/ubus/vendor/hisilicon/Makefile +++ b/drivers/ub/ubus/vendor/hisilicon/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ hisi_ubus-objs := hisi-ubus.o controller.o vdm.o local-ras.o msg.o msg-core.o -hisi_ubus-objs += msg-debugfs.o eu-table.o +hisi_ubus-objs += msg-debugfs.o eu-table.o memory.o obj-$(CONFIG_UB_HISI_UBUS) += hisi_ubus.o diff --git a/drivers/ub/ubus/vendor/hisilicon/controller.c b/drivers/ub/ubus/vendor/hisilicon/controller.c index 52d4086d39b2a7b54863d82c0a198a293ec74c68..d7ea5c118d32bbf04e81ccf1dce37666b0890cf4 100644 --- a/drivers/ub/ubus/vendor/hisilicon/controller.c +++ b/drivers/ub/ubus/vendor/hisilicon/controller.c @@ -17,6 +17,10 @@ static struct ub_bus_controller_ops hi_ubc_ops = { .eu_table_init = hi_eu_table_init, .eu_table_uninit = hi_eu_table_uninit, .eu_cfg = hi_eu_cfg, + .mem_decoder_create = hi_mem_decoder_create, + .mem_decoder_remove = hi_mem_decoder_remove, + .register_ubmem_irq = hi_register_ubmem_irq, + .unregister_ubmem_irq = hi_unregister_ubmem_irq, .register_decoder_base_addr = hi_register_decoder_base_addr, .entity_enable = hi_send_entity_enable_msg, }; diff --git a/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h b/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h index 78476423bdfb5fb1cb100025c7799d36647db601..d1a68934d174b8fcd27c77003d80f479cb156e38 100644 --- a/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h +++ b/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h @@ -59,6 +59,7 @@ enum hi_task_type { enum hi_msgq_private_opcode { EU_TABLE_CFG_CMD = 2, + GET_UBMEM_EVENT_CMD = 4 }; enum hi_msgq_user { diff --git a/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h b/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h index ae86a0e22ff1b6df24dff41d15a7b1c5f0723048..9aa3ba5521c1bd5725eaf5d4e6328cbc7ff9850e 100644 --- a/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h +++ b/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h @@ -35,6 +35,10 @@ struct hi_ubc_private_data { int hi_eu_table_init(struct ub_bus_controller *ubc); void hi_eu_table_uninit(struct ub_bus_controller *ubc); int hi_eu_cfg(struct ub_bus_controller *ubc, bool add, u32 eid, u16 upi); +int hi_mem_decoder_create(struct ub_bus_controller *ubc); +void hi_mem_decoder_remove(struct ub_bus_controller *ubc); +void hi_register_ubmem_irq(struct ub_bus_controller *ubc); +void hi_unregister_ubmem_irq(struct ub_bus_controller *ubc); void hi_register_decoder_base_addr(struct ub_bus_controller *ubc, u64 *cmd_queue, u64 *event_queue); int hi_send_entity_enable_msg(struct ub_entity *uent, u8 enable); diff --git a/drivers/ub/ubus/vendor/hisilicon/memory.c b/drivers/ub/ubus/vendor/hisilicon/memory.c new file mode 100644 index 0000000000000000000000000000000000000000..4d4f80f847fca2456f81f2d3cb6593b9da361ca7 --- /dev/null +++ b/drivers/ub/ubus/vendor/hisilicon/memory.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#define pr_fmt(fmt) "ubus hisi memory: " fmt + +#include +#include + +#include "../../ubus.h" +#include "../../msg.h" +#include "../../memory.h" +#include "hisi-msg.h" +#include "hisi-ubus.h" +#define CREATE_TRACE_POINTS +#include "memory_trace.h" + +#define DRAIN_ENABLE_REG_OFFSET 0x24 +#define DRAIN_STATE_REG_OFFSET 0x28 + +#define HI_GET_UBMEM_EVENT_REQ_SIZE 4 +#define HI_GET_UBMEM_EVENT_RSP_SIZE 772 +#define MEM_EVENT_MAX_NUM 16 +#define MAR_ERR_ADDR_COUNT 10 +#define MAR_ERR_ADDR_SIZE 2 + +#define hpa_gen(addr_h, addr_l) (((u64)(addr_h) << 32) | (addr_l)) + +struct ub_mem_decoder { + struct device *dev; + struct ub_entity *uent; + void *base_reg; +}; + +struct hi_ubmem_event { + u32 device_ras_status3; + u32 device_ras_status4; + u32 err_addr[MAR_ERR_ADDR_COUNT]; +}; + +struct hi_get_ubmem_event_rsp { + u32 event_num; + struct hi_ubmem_event event_info[MEM_EVENT_MAX_NUM]; +}; + +struct hi_get_ubmem_event_req { + u32 rsv0; +}; + +struct hi_get_ubmem_event_pld { + union { + struct hi_get_ubmem_event_req req; + struct hi_get_ubmem_event_rsp rsp; + }; +}; + +static bool hi_mem_validate_pa(struct ub_bus_controller *ubc, + u64 pa_start, u64 pa_end, bool cacheable); + +static void hi_mem_drain_start(struct ub_mem_device *mem_device) +{ + struct ub_mem_decoder *decoder, *data = mem_device->priv_data; + + if (!data) { + dev_err(mem_device->dev, "ubc mem_decoder is null.\n"); + return; + } + + for (int i = 0; i < MEM_INFO_NUM; i++) { + decoder = &data[i]; + writel(0, decoder->base_reg + DRAIN_ENABLE_REG_OFFSET); + writel(1, decoder->base_reg + DRAIN_ENABLE_REG_OFFSET); + } +} + +static int hi_mem_drain_state(struct ub_mem_device *mem_device) +{ + struct ub_mem_decoder *decoder, *data = mem_device->priv_data; + int val = 0; + + if (!data) { + dev_err(mem_device->dev, "ubc mem_decoder is null.\n"); + return 0; + } + + for (int i = 0; i < MEM_INFO_NUM; i++) { + decoder = &data[i]; + val = readb(decoder->base_reg + DRAIN_STATE_REG_OFFSET) & 0x1; + dev_info_ratelimited(decoder->dev, "ub memory decoder[%d] drain state, val=%d\n", + i, val); + if (!val) + return val; + } + + return val; +} + +static const struct ub_mem_device_ops device_ops = { + .mem_drain_start = hi_mem_drain_start, + .mem_drain_state = hi_mem_drain_state, + .mem_validate_pa = hi_mem_validate_pa, +}; + +static int save_ras_err_info(struct ub_mem_device *mem_device, + enum ras_err_type type, u64 hpa) +{ + struct ub_mem_ras_err_info err_info = { + .type = type, + .hpa = hpa, + }; + + if (!kfifo_put(&mem_device->ras_ctx.ras_fifo, err_info)) { + dev_err(mem_device->dev, "kfifo put failed!\n"); + return -ENOMEM; + } + + return 0; +} + +static irqreturn_t hi_mem_ras_isr(int irq, void *context) +{ + struct ub_bus_controller *ubc = (struct ub_bus_controller *)context; + struct ub_mem_ras_ctx *ras_ctx = &ubc->mem_device->ras_ctx; + struct ub_mem_ras_err_info err_info; + ubmem_ras_handler handler; + int ret; + + handler = ub_mem_ras_handler_get(); + while (kfifo_get(&ras_ctx->ras_fifo, &err_info)) { + trace_mem_ras_event(ubc->mem_device, &err_info); + pr_info("ras: type=%u\n", err_info.type); + if (handler) { + ret = handler(err_info.hpa, err_info.type); + WARN_ON(ret); + } + } + + return IRQ_HANDLED; +} + +static int err_type_bitmap[] = { + /* DEVICE_RAS_STATUS_3 */ + [UB_MEM_ATOMIC_DATA_ERR] = 31, + [UB_MEM_READ_DATA_ERR] = 28, + [UB_MEM_FLOW_POISON] = 27, + [UB_MEM_FLOW_READ_AUTH_POISON] = 23, + [UB_MEM_FLOW_READ_AUTH_RESPERR] = 22, + [UB_MEM_TIMEOUT_POISON] = 21, + [UB_MEM_TIMEOUT_RESPERR] = 20, + [UB_MEM_READ_DATA_POISON] = 19, + [UB_MEM_READ_DATA_RESPERR] = 18, + /* DEVICE_RAS_STATUS_4 */ + [MAR_NOPORT_VLD_INT_ERR] = 26, + [MAR_FLUX_INT_ERR] = 25, + [MAR_WITHOUT_CXT_ERR] = 24, + [RSP_BKPRE_OVER_TIMEOUT_ERR] = 10, + /* DEVICE_RAS_STATUS_4 need save addr */ + [MAR_NEAR_AUTH_FAIL_ERR] = 21, + [MAR_FAR_AUTH_FAIL_ERR] = 22, + [MAR_TIMEOUT_ERR] = 23, + [MAR_ILLEGAL_ACCESS_ERR] = 9, + [REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR] = 11, +}; + +static int save_ras_err_info_all(struct ub_bus_controller *ubc, struct hi_ubmem_event *info) +{ + unsigned long status3_bitmap = (unsigned long)info->device_ras_status3; + unsigned long status4_bitmap = (unsigned long)info->device_ras_status4; + u32 addr_h, addr_l; + int ret = -EINVAL; + u64 hpa = 0; + int index; + int i; + + for (i = UB_MEM_ATOMIC_DATA_ERR; i <= UB_MEM_READ_DATA_RESPERR; i++) { + if (test_bit(err_type_bitmap[i], &status3_bitmap)) { + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + if (ret) + return ret; + } + } + + for (i = MAR_FLUX_INT_ERR; i <= RSP_BKPRE_OVER_TIMEOUT_ERR; i++) { + if (test_bit(err_type_bitmap[i], &status4_bitmap)) { + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + if (ret) + return ret; + } + } + + for (i = MAR_NEAR_AUTH_FAIL_ERR; i <= REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR; i++) { + if (test_bit(err_type_bitmap[i], &status4_bitmap)) { + index = MAR_ERR_ADDR_SIZE * (i - MAR_NEAR_AUTH_FAIL_ERR); + addr_h = info->err_addr[index + 1]; + addr_l = info->err_addr[index]; + hpa = hpa_gen(addr_h, addr_l); + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + if (ret) + return ret; + } + } + + /* if no_port_vld and near_auth_fail report at the same time, ignore no_port_vld */ + if (test_bit(err_type_bitmap[MAR_NOPORT_VLD_INT_ERR], &status4_bitmap) && + !test_bit(err_type_bitmap[MAR_NEAR_AUTH_FAIL_ERR], &status4_bitmap)) { + i = MAR_NOPORT_VLD_INT_ERR; + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + } + + return ret; +} + +static irqreturn_t hi_mem_ras_irq(int irq, void *context) +{ + struct ub_bus_controller *ubc = (struct ub_bus_controller *)context; + struct hi_get_ubmem_event_pld pld = {}; + struct msg_info info = {}; + u32 event_cnt; + int ret; + + message_info_init(&info, ubc->uent, &pld, &pld, + (HI_GET_UBMEM_EVENT_REQ_SIZE << MSG_REQ_SIZE_OFFSET) | + HI_GET_UBMEM_EVENT_RSP_SIZE); + ret = hi_message_private(ubc->mdev, &info, GET_UBMEM_EVENT_CMD); + if (ret) { + dev_err(&ubc->dev, "get ubmem event failed, ret=%d\n", + ret); + return IRQ_HANDLED; + } + + event_cnt = pld.rsp.event_num; + if (event_cnt == 0 || event_cnt > MEM_EVENT_MAX_NUM) { + dev_err(&ubc->dev, "event_cnt [%u] is invalid\n", event_cnt); + return IRQ_HANDLED; + } + + for (u32 i = 0; i < event_cnt; i++) { + ret = save_ras_err_info_all(ubc, &pld.rsp.event_info[i]); + if (ret == -EINVAL) { + dev_err(&ubc->dev, "save_ras_err_info failed, ret=%d\n", ret); + return IRQ_HANDLED; + } + } + + return IRQ_WAKE_THREAD; +} + +static int hi_mem_decoder_create_one(struct ub_bus_controller *ubc, int mar_id) +{ + struct hi_ubc_private_data *data = (struct hi_ubc_private_data *)ubc->data; + struct ub_mem_decoder *decoder, *priv_data = ubc->mem_device->priv_data; + + decoder = &priv_data[mar_id]; + decoder->dev = &ubc->dev; + decoder->uent = ubc->uent; + + decoder->base_reg = ioremap(data->mem_pa_info[mar_id].decode_addr, + SZ_64); + if (!decoder->base_reg) { + dev_err(decoder->dev, "ub mem decoder base reg ioremap failed.\n"); + return -ENOMEM; + } + + return 0; +} + +static void hi_mem_decoder_remove_one(struct ub_bus_controller *ubc, int mar_id) +{ + struct ub_mem_decoder *priv_data = ubc->mem_device->priv_data; + + iounmap(priv_data[mar_id].base_reg); +} + +int hi_mem_decoder_create(struct ub_bus_controller *ubc) +{ + struct ub_mem_device *mem_device; + void *priv_data; + int ret; + + mem_device = kzalloc(sizeof(*mem_device), GFP_KERNEL); + if (!mem_device) + return -ENOMEM; + + priv_data = kcalloc(MEM_INFO_NUM, sizeof(struct ub_mem_decoder), + GFP_KERNEL); + if (!priv_data) { + kfree(mem_device); + return -ENOMEM; + } + + mem_device->dev = &ubc->dev; + mem_device->uent = ubc->uent; + mem_device->ubmem_irq_num = -1; + mem_device->ops = &device_ops; + mem_device->priv_data = priv_data; + ubc->mem_device = mem_device; + + for (int i = 0; i < MEM_INFO_NUM; i++) { + ret = hi_mem_decoder_create_one(ubc, i); + if (ret) { + dev_err(&ubc->dev, "hi mem create decoder %d failed\n", i); + for (int j = i - 1; j >= 0; j--) + hi_mem_decoder_remove_one(ubc, j); + + kfree(mem_device->priv_data); + kfree(mem_device); + ubc->mem_device = NULL; + return ret; + } + } + + return ret; +} + +void hi_mem_decoder_remove(struct ub_bus_controller *ubc) +{ + if (!ubc->mem_device) + return; + + for (int i = 0; i < MEM_INFO_NUM; i++) + hi_mem_decoder_remove_one(ubc, i); + + kfree(ubc->mem_device->priv_data); + kfree(ubc->mem_device); + ubc->mem_device = NULL; +} + +void hi_register_ubmem_irq(struct ub_bus_controller *ubc) +{ + struct ub_entity *uent = ubc->uent; + int irq_num, ret; + u32 usi_idx; + + if (!ubc->mem_device) { + pr_err("mem device is NULL!\n"); + return; + } + + ret = ub_cfg_read_dword(uent, UB_MEM_USI_IDX, &usi_idx); + if (ret) { + ub_err(uent, "get ubmem usi idx failed, ret=%d\n", ret); + return; + } + + irq_num = ub_irq_vector(uent, usi_idx); + if (irq_num < 0) { + ub_err(uent, "ub get irq vector failed, irq num=%d\n", irq_num); + return; + } + + INIT_KFIFO(ubc->mem_device->ras_ctx.ras_fifo); + + ret = request_threaded_irq(irq_num, hi_mem_ras_irq, + hi_mem_ras_isr, IRQF_SHARED, + "ub_mem_event", ubc); + if (ret) { + ub_err(uent, "ubmem request_irq failed, ret=%d\n", ret); + return; + } + + ubc->mem_device->ubmem_irq_num = irq_num; +} + +void hi_unregister_ubmem_irq(struct ub_bus_controller *ubc) +{ + int irq_num; + + if (!ubc->mem_device) { + dev_err(&ubc->dev, "mem device is NULL!\n"); + return; + } + + irq_num = ubc->mem_device->ubmem_irq_num; + if (irq_num < 0) + return; + + free_irq((unsigned int)irq_num, (void *)ubc); +} + +#define MB_SIZE_OFFSET 20 + +static bool ub_hpa_valid(u64 pa_start, u64 pa_end, u32 base_addr, u32 size) +{ + if (pa_start >= ((u64)base_addr << MB_SIZE_OFFSET) && + pa_end < (((u64)base_addr + (u64)size) << MB_SIZE_OFFSET)) + return true; + + return false; +} + +static bool hi_mem_validate_pa(struct ub_bus_controller *ubc, + u64 pa_start, u64 pa_end, bool cacheable) +{ + struct hi_ubc_private_data *data; + + if (!ubc->data) { + dev_err(&ubc->dev, "Ubc data is null.\n"); + return false; + } + + if (pa_end < pa_start) { + dev_err(&ubc->dev, "pa_start is over pa_end.\n"); + return false; + } + + data = (struct hi_ubc_private_data *)ubc->data; + for (u16 i = 0; i < MEM_INFO_NUM; i++) { + if (ub_hpa_valid(pa_start, pa_end, + data->mem_pa_info[i].cc_base_addr, + data->mem_pa_info[i].cc_base_size) && + cacheable) + return true; + + if (ub_hpa_valid(pa_start, pa_end, + data->mem_pa_info[i].nc_base_addr, + data->mem_pa_info[i].nc_base_size) && + !cacheable) + return true; + } + + dev_err(&ubc->dev, "pa_start-pa_end is invalid.\n"); + return false; +} diff --git a/drivers/ub/ubus/vendor/hisilicon/memory_trace.h b/drivers/ub/ubus/vendor/hisilicon/memory_trace.h new file mode 100644 index 0000000000000000000000000000000000000000..9204d94aa6b8529ec92b7789ae01b687571dd69e --- /dev/null +++ b/drivers/ub/ubus/vendor/hisilicon/memory_trace.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +/* This must be outside ifdef __HISI_MEMORY_TRACE_H__ */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ub_memory + +#if !defined(__HISI_MEMORY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __HISI_MEMORY_TRACE_H__ + +#include + +TRACE_EVENT(mem_ras_event, + TP_PROTO(struct ub_mem_device *device, struct ub_mem_ras_err_info *info), + TP_ARGS(device, info), + + TP_STRUCT__entry( + __field(u32, eid) + __field(u32, cna) + __field(u8, type) + __field(u64, hpa) + ), + + TP_fast_assign( + __entry->eid = device->uent->eid; + __entry->cna = device->uent->cna; + __entry->type = (u8)info->type; + __entry->hpa = info->hpa; + ), + + TP_printk( + "%u-%u-%u-%llu", __entry->eid, __entry->cna, + __entry->type, __entry->hpa + ) +); + +#endif /* __HISI_MEMORY_TRACE_H__ */ + +/* This must be outside ifdef __HISI_MEMORY_TRACE_H__ */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/ub/ubus/vendor/hisilicon +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE memory_trace +#include diff --git a/include/acpi/button.h b/include/acpi/button.h index af2fce5d2ee33bec227cfb6e0b25049ce060320d..789ed88d0d57c9a12216ae462024aba325c7bf12 100644 --- a/include/acpi/button.h +++ b/include/acpi/button.h @@ -2,6 +2,8 @@ #ifndef ACPI_BUTTON_H #define ACPI_BUTTON_H +#include + #define ACPI_BUTTON_HID_POWER "PNP0C0C" #define ACPI_BUTTON_HID_LID "PNP0C0D" #define ACPI_BUTTON_HID_SLEEP "PNP0C0E" @@ -15,4 +17,18 @@ static inline int acpi_lid_open(void) } #endif /* IS_ENABLED(CONFIG_ACPI_BUTTON) */ +#if IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) +int register_acpi_power_notifier(struct notifier_block *nb); +int unregister_acpi_power_notifier(struct notifier_block *nb); +#else +static inline int register_acpi_power_notifier(struct notifier_block *nb) +{ + return -EINVAL; +}; +static inline int unregister_acpi_power_notifier(struct notifier_block *nb) +{ + return -EINVAL; +}; +#endif /* IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) */ + #endif /* ACPI_BUTTON_H */ diff --git a/include/linux/firmware/uvb/cis.h b/include/linux/firmware/uvb/cis.h new file mode 100644 index 0000000000000000000000000000000000000000..a8418a7b53a0d50fdbae221722894b25bd9609f1 --- /dev/null +++ b/include/linux/firmware/uvb/cis.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: cis header + * Create: 2025-04-18 + */ + +#ifndef CIS_H +#define CIS_H +#include + +// Call ID +#define UBIOS_CALL_ID_FLAG 0x3 +#define UBIOS_CALL_ID_PANIC_CALL 0xc00b2010 +#define UBIOS_CALL_ID_GET_DEVICE_INFO 0xc00b0b26 + +// User ID format +#define UBIOS_USER_ID_NO (0x00 << 24) +#define UBIOS_USER_ID_BIOS (0x01 << 24) +#define UBIOS_USER_ID_BMC (0x0B << 24) +#define UBIOS_USER_ID_UB_DEVICE (0x10 << 24) +#define UBIOS_USER_ID_INTERGRATED_UB_DEVICE (0x11 << 24) +#define UBIOS_USER_ID_RICH_OS (0x20 << 24) +#define UBIOS_USER_ID_TRUST_OS (0x30 << 24) +#define UBIOS_USER_ID_PCIE_DEVICE (0x40 << 24) +#define UBIOS_USER_ID_INTERGRATED_PCIE_DEVICE (0x41 << 24) +#define UBIOS_USER_ID_ALL (0xFF << 24) +#define UBIOS_USER_TYPE_MASK UBIOS_USER_ID_ALL +#define UBIOS_USER_INDEX_MASK ((u32)(~UBIOS_USER_TYPE_MASK)) + +#define UBIOS_MY_USER_ID UBIOS_USER_ID_INTERGRATED_UB_DEVICE +#define UBIOS_GET_MESSAGE_FLAG(user_id) ((u32)((user_id) >> 30)) + +struct cis_message { + void *input; + u32 input_size; + void *output; + u32 *p_output_size; +}; + +// cis call +int cis_call_by_uvb(u32 call_id, u32 sender_id, + u32 receiver_id, struct cis_message *msg, bool is_sync); +int uvb_polling_sync(void *data); + +// cis register +typedef int (*msg_handler)(struct cis_message *msg); +int register_local_cis_func(u32 call_id, u32 receiver_id, msg_handler func); +int unregister_local_cis_func(u32 call_id, u32 receiver_id); + +#endif diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h new file mode 100644 index 0000000000000000000000000000000000000000..a958092e5687f3f474ea6f8c188fb920f755b2f1 --- /dev/null +++ b/include/uapi/ub/obmm.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef UAPI_OBMM_H +#define UAPI_OBMM_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + + +#define OBMM_MAX_LOCAL_NUMA_NODES 16 +#define MAX_NUMA_DIST 255 +#define OBMM_MAX_PRIV_LEN 512 +#define OBMM_MAX_VENDOR_LEN 128 + + +#define OBMM_EXPORT_FLAG_ALLOW_MMAP 0x1UL +#define OBMM_EXPORT_FLAG_FAST 0x2UL +#define OBMM_EXPORT_FLAG_MASK (OBMM_EXPORT_FLAG_ALLOW_MMAP | OBMM_EXPORT_FLAG_FAST) + +struct obmm_cmd_export_pid { + void *va; + __u64 length; + __u64 flags; + __u64 uba; + __u64 mem_id; + __u32 tokenid; + __s32 pid; + __s32 pxm_numa; + __u16 priv_len; + __u16 vendor_len; + __u8 deid[16]; + __u8 seid[16]; + const void *priv; + const void *vendor_info; +} __attribute__((aligned(8))); + +/* For ordinary register requests, @length and @flags are input arguments while + * @tokenid, @uba and @mem_id are values set by obmm kernel module. For + * register request, @length, @flags, @tokenid and @uba are input to obmm + * kernel module. @mem_id is the only output. + */ +struct obmm_cmd_export { + __u64 size[OBMM_MAX_LOCAL_NUMA_NODES]; + __u64 length; + __u64 flags; + __u64 uba; + __u64 mem_id; + __u32 tokenid; + __s32 pxm_numa; + __u16 priv_len; + __u16 vendor_len; + __u8 deid[16]; + __u8 seid[16]; + const void *vendor_info; + const void *priv; +} __attribute__((aligned(8))); + +#define OBMM_UNEXPORT_FLAG_MASK (0UL) + +struct obmm_cmd_unexport { + __u64 mem_id; + __u64 flags; +} __attribute__((aligned(8))); + +enum obmm_query_key_type { + OBMM_QUERY_BY_PA, + OBMM_QUERY_BY_ID_OFFSET +}; + +struct obmm_cmd_addr_query { + /* key type decides the input and output */ + enum obmm_query_key_type key_type; + __u64 mem_id; + __u64 offset; + __u64 pa; +} __attribute__((aligned(8))); + +#define OBMM_IMPORT_FLAG_ALLOW_MMAP 0x1UL +#define OBMM_IMPORT_FLAG_PREIMPORT 0x2UL +#define OBMM_IMPORT_FLAG_NUMA_REMOTE 0x4UL +#define OBMM_IMPORT_FLAG_MASK (OBMM_IMPORT_FLAG_ALLOW_MMAP | \ + OBMM_IMPORT_FLAG_PREIMPORT | \ + OBMM_IMPORT_FLAG_NUMA_REMOTE) + + +struct obmm_cmd_import { + __u64 flags; + __u64 mem_id; + __u64 addr; + __u64 length; + __u32 tokenid; + __u32 scna; + __u32 dcna; + __s32 numa_id; + __u16 priv_len; + __u8 base_dist; + __u8 deid[16]; + __u8 seid[16]; + const void *priv; +} __attribute__((aligned(8))); + +#define OBMM_UNIMPORT_FLAG_MASK (0UL) + +struct obmm_cmd_unimport { + __u64 mem_id; + __u64 flags; +} __attribute__((aligned(8))); + + +#define OBMM_CMD_EXPORT _IOWR('x', 0, struct obmm_cmd_export) +#define OBMM_CMD_IMPORT _IOWR('x', 1, struct obmm_cmd_import) +#define OBMM_CMD_UNEXPORT _IOW('x', 2, struct obmm_cmd_unexport) +#define OBMM_CMD_UNIMPORT _IOW('x', 3, struct obmm_cmd_unimport) +#define OBMM_CMD_ADDR_QUERY _IOWR('x', 4, struct obmm_cmd_addr_query) +#define OBMM_CMD_EXPORT_PID _IOWR('x', 5, struct obmm_cmd_export_pid) +#define OBMM_CMD_DECLARE_PREIMPORT _IOWR('x', 6, struct obmm_cmd_preimport) +#define OBMM_CMD_UNDECLARE_PREIMPORT _IOW('x', 7, struct obmm_cmd_preimport) + +/* 2bits */ +#define OBMM_SHM_MEM_CACHE_RESV 0x0 +#define OBMM_SHM_MEM_NORMAL 0x1 +#define OBMM_SHM_MEM_NORMAL_NC 0x2 +#define OBMM_SHM_MEM_DEVICE 0x3 +#define OBMM_SHM_MEM_CACHE_MASK 0b11 +/* 2bits */ +#define OBMM_SHM_MEM_READONLY 0x0 +#define OBMM_SHM_MEM_READEXEC 0x4 +#define OBMM_SHM_MEM_READWRITE 0x8 +#define OBMM_SHM_MEM_NO_ACCESS 0xc +#define OBMM_SHM_MEM_ACCESS_MASK 0b1100 + +/* cache maintenance operations (not states) */ +/* no cache maintenance (nops) */ +#define OBMM_SHM_CACHE_NONE 0x0 +/* invalidate only (in-cache modifications may not be written back to DRAM) */ +#define OBMM_SHM_CACHE_INVAL 0x1 +/* write back and invalidate */ +#define OBMM_SHM_CACHE_WB_INVAL 0x2 +/* write back only */ +#define OBMM_SHM_CACHE_WB_ONLY 0x3 +/* Automatically choose the cache maintenance action depending on the memory + * state. The resulting choice always make sure no data would be lost, and might + * be more conservative than necessary. + */ +#define OBMM_SHM_CACHE_INFER 0x4 + +struct obmm_cmd_update_range { + /* address range to manipulate: [start, end) */ + __u64 start; + __u64 end; + __u8 mem_state; + __u8 cache_ops; +} __attribute__((aligned(8))); + +#define OBMM_SHMDEV_UPDATE_RANGE _IOW('X', 0, struct obmm_cmd_update_range) + +struct obmm_cmd_preimport { + __u64 pa; + __u64 length; + __u64 flags; + __u32 scna; + __u32 dcna; + __s32 numa_id; + __u16 priv_len; + __u8 base_dist; + __u8 deid[16]; + __u8 seid[16]; + const void *priv; +} __attribute__((aligned(16), packed)); + +#define OBMM_PREIMPORT_FLAG_MASK (0UL) +#define OBMM_UNPREIMPORT_FLAG_MASK (0UL) + +#define OBMM_MMAP_FLAG_HUGETLB_PMD (1UL << 63) + +#if defined(__cplusplus) +} +#endif + +#endif /* UAPI_OBMM_H */ diff --git a/include/ub/ubus/ub-mem-decoder.h b/include/ub/ubus/ub-mem-decoder.h new file mode 100644 index 0000000000000000000000000000000000000000..56ba2bed34b09a5bbdd0d746fe0119efbacecc74 --- /dev/null +++ b/include/ub/ubus/ub-mem-decoder.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#ifndef _UB_UBUS_UB_MEM_DECODER_H_ +#define _UB_UBUS_UB_MEM_DECODER_H_ + +#include + +enum ras_err_type { + UB_MEM_ATOMIC_DATA_ERR = 0, + UB_MEM_READ_DATA_ERR, + UB_MEM_FLOW_POISON, + UB_MEM_FLOW_READ_AUTH_POISON, + UB_MEM_FLOW_READ_AUTH_RESPERR, + UB_MEM_TIMEOUT_POISON, + UB_MEM_TIMEOUT_RESPERR, + UB_MEM_READ_DATA_POISON, + UB_MEM_READ_DATA_RESPERR, + MAR_NOPORT_VLD_INT_ERR, + MAR_FLUX_INT_ERR, + MAR_WITHOUT_CXT_ERR, + RSP_BKPRE_OVER_TIMEOUT_ERR, + MAR_NEAR_AUTH_FAIL_ERR, + MAR_FAR_AUTH_FAIL_ERR, + MAR_TIMEOUT_ERR, + MAR_ILLEGAL_ACCESS_ERR, + REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR, +}; + +typedef int (*ubmem_ras_handler)(u64, enum ras_err_type); + +#ifdef CONFIG_UB_UBUS + +/* + * ub_mem_ras_handler_register - register ub memory ras handler for OBMM + * @handler: OBMM ras handler + */ +void ub_mem_ras_handler_register(ubmem_ras_handler handler); + +/* + * ub_mem_ras_handler_unregister - unregister ub memory ras handler for OBMM + */ +void ub_mem_ras_handler_unregister(void); + +/* + * ub_mem_ras_handler_get - get ub memory ras handler + * RETURN VALUE: ubmem_ras_handler + */ +ubmem_ras_handler ub_mem_ras_handler_get(void); + +/* + * ub_mem_drain_start - start ub memory drain + * @scna: source cna + */ +void ub_mem_drain_start(u32 scna); + +/* + * ub_mem_drain_state - whether ub memory drain has been finished + * @scna: source cna + * RETURN VALUE: + * 0 if drain not finish; 1 if drain finish + * other if failed. + */ +int ub_mem_drain_state(u32 scna); + +/* + * ub_mem_get_numa_id - get ubc numa id from scna + * @scna: source cna + * RETURN VALUE: + * numa id + */ +int ub_mem_get_numa_id(u32 scna); + +/* + * ub_memory_validate_pa - Determine whether hpa is valid + * @scna: source cna + * @pa_start: hpa start address + * @pa_end: hpa end address + * @cacheable: cacheable flag + * RETURN VALUE: + * true if hpa is valid + * false if hpa is invalid + */ +bool ub_memory_validate_pa(u32 scna, u64 pa_start, u64 pa_end, bool cacheable); + +#else /* CONFIG_UB_UBUS is not enabled */ +static inline void ub_mem_ras_handler_register(ubmem_ras_handler handler) {} +static inline void ub_mem_ras_handler_unregister(void) {} +static inline ubmem_ras_handler ub_mem_ras_handler_get(void) { return NULL; } +static inline void ub_mem_drain_start(u32 scna) {} +static inline int ub_mem_drain_state(u32 scna) { return -EINVAL; } +static inline int ub_mem_get_numa_id(u32 scna) { return NUMA_NO_NODE; } +static inline bool ub_memory_validate_pa(u32 scna, u64 pa_start, u64 pa_end, + bool cacheable) +{ return false; } +#endif /* CONFIG_UB_UBUS */ + +#endif /* _UB_UBUS_UB_MEM_DECODER_H_ */ diff --git a/include/ub/ubus/ubus.h b/include/ub/ubus/ubus.h index 13f3b6b2ce3bdbf1ea1f5a0fc2d26a864c9db36a..a81d652a18ff6f84cf092f2a3d8cb9db9d22a627 100644 --- a/include/ub/ubus/ubus.h +++ b/include/ub/ubus/ubus.h @@ -422,6 +422,9 @@ struct ub_bus_controller { struct ub_bus_instance *bi; struct ub_bus_instance *cluster_bi; + /* ub memory decoder */ + struct ub_mem_device *mem_device; + void *data; struct dentry *debug_root;