From 78a3e1113b07c3d4ee03978e49a909b684477cc4 Mon Sep 17 00:00:00 2001 From: klmengkd Date: Fri, 31 Oct 2025 15:06:18 +0800 Subject: [PATCH 01/48] ubios_uvb: support UBIOS object description specification parse commit 24fa96b6e010d05e59d99562a415b1a64544e292 openEuler This patch support to get uvb and cis information by parsing ubios object description specification. Signed-off-by: Anonymous_Z Signed-off-by: klmengkd --- drivers/Kconfig | 1 + drivers/Makefile | 1 + drivers/ubios_uvb/Kconfig | 31 + drivers/ubios_uvb/Makefile | 6 + drivers/ubios_uvb/include/cis_uvb_interface.h | 116 +++ drivers/ubios_uvb/include/odf_interface.h | 94 +++ drivers/ubios_uvb/odf/Makefile | 10 + drivers/ubios_uvb/odf/include/libodf.h | 38 + drivers/ubios_uvb/odf/include/libodf_handle.h | 182 +++++ drivers/ubios_uvb/odf/include/odf_trans.h | 34 + drivers/ubios_uvb/odf/odf_data.c | 725 ++++++++++++++++++ drivers/ubios_uvb/odf/odf_file.c | 76 ++ drivers/ubios_uvb/odf/odf_helper.c | 217 ++++++ drivers/ubios_uvb/odf/odf_trans.c | 487 ++++++++++++ 14 files changed, 2018 insertions(+) create mode 100644 drivers/ubios_uvb/Kconfig create mode 100644 drivers/ubios_uvb/Makefile create mode 100644 drivers/ubios_uvb/include/cis_uvb_interface.h create mode 100644 drivers/ubios_uvb/include/odf_interface.h create mode 100644 drivers/ubios_uvb/odf/Makefile create mode 100644 drivers/ubios_uvb/odf/include/libodf.h create mode 100644 drivers/ubios_uvb/odf/include/libodf_handle.h create mode 100644 drivers/ubios_uvb/odf/include/odf_trans.h create mode 100644 drivers/ubios_uvb/odf/odf_data.c create mode 100644 drivers/ubios_uvb/odf/odf_file.c create mode 100644 drivers/ubios_uvb/odf/odf_helper.c create mode 100644 drivers/ubios_uvb/odf/odf_trans.c diff --git a/drivers/Kconfig b/drivers/Kconfig index bfb2bdb00477..10fa9c700a9d 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -248,4 +248,5 @@ source "drivers/cpuinspect/Kconfig" source "drivers/thirdparty/Kconfig" +source "drivers/ubios_uvb/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 800793aafbbb..01a48436281d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -201,3 +201,4 @@ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ obj-$(CONFIG_S390) += s390/ +obj-$(CONFIG_UDFI) += ubios_uvb/ diff --git a/drivers/ubios_uvb/Kconfig b/drivers/ubios_uvb/Kconfig new file mode 100644 index 000000000000..97a69aaa686f --- /dev/null +++ b/drivers/ubios_uvb/Kconfig @@ -0,0 +1,31 @@ +config UDFI + bool "UDFI Drivers" + depends on ARM64 + default n + help + UBIOS Distributed Firmware Interface (UDFI) support for kernel + requires a UBIOS platform. UDFI provides communication channels + among OS, BIOS and other firmware: + (1) Call ID Service (CIS), by which OS sends a call to BIOS, can be used + to operate specific hardware, read/write BIOS information or call BIOS + functions , e.g., read/write RTC, modify boot options, etc. + (2) Notify ID Information (NII), by which OS receives notification from + BIOS, provides notification mechanism from BIOS to OS. This is useful + when events detected and BIOS needs to inform OS, e.g., RAS events. + +if UDFI + +config UDFI_CIS + tristate "CIS framework" + select UDFI_ODF + default n + help + This option is selected if CIS framework is needed. + +config UDFI_ODF + tristate "odf parse" + default n + help + This driver support UBIOS object description specification parse + +endif # UDFI diff --git a/drivers/ubios_uvb/Makefile b/drivers/ubios_uvb/Makefile new file mode 100644 index 000000000000..5d710f105572 --- /dev/null +++ b/drivers/ubios_uvb/Makefile @@ -0,0 +1,6 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. +# Create : 2025-04-18 +# Description : cis odf Makefile + +obj-$(CONFIG_UDFI) += odf/ +obj-$(CONFIG_UDFI) += cis/ diff --git a/drivers/ubios_uvb/include/cis_uvb_interface.h b/drivers/ubios_uvb/include/cis_uvb_interface.h new file mode 100644 index 000000000000..c476537eb51e --- /dev/null +++ b/drivers/ubios_uvb/include/cis_uvb_interface.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: cis uvb interface header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef CIS_UVB_INTERFACE_H +#define CIS_UVB_INTERFACE_H +#include +#include "odf_interface.h" + +#define LOG_PRE "[UVB]:" +#define ERR_PRE "[UVB]ERR:" +/** + * struct cis_group - call id service group + * @owner_user_id: user id that indicates which component owns the cia[] array + * @cis_count: number of cia in the group + * @cia: array of call id attribute + * @forwarder_id forwarder id + */ +struct cis_group { + u32 owner_user_id; + u32 cis_count; + u8 usage; + u8 index; + u32 forwarder_id; + u32 call_id[]; +}; + +/** + * struct cis_ub - call id service ub struct + * @usage: usage for channel + * @index: index for uvb + * @forwarder_id forwarder id + */ +struct cis_ub { + u8 usage; + u8 index; + u32 forwarder_id; +}; + +/** + * struct cis_info - call id service information + * @group_count: number of cis group + * @groups: array of cis group + */ +struct cis_info { + u32 group_count; + u32 reserved; + struct cis_ub ub; + struct cis_group *groups[]; +}; + + +extern struct cis_info *g_cis_info; + +#define UVB_OUTPUT_SIZE_NULL 0xFFFFFFFF +#define UVB_WINDOW_COUNT_MAX 0xFF + +/** + * struct uvb_window + * @version: uvb window version + * @message_id: call id + * @sender_id: user id of caller + * @receiver_id: user id of callee + * @input_data_address: input data physical address + * @input_date_size: input data size + * @input_data_checksum: input data checksum, not used yet + * @output_data_address: output data physical address + * @output_data_size: output data size + * @output_data_checksum: output data checksum, not used yet + * @returned_status: UVB window index, if usage indicates UVB + */ +struct uvb_window { + u8 version; + u8 reserved1[3]; + u32 message_id; + u32 sender_id; + u32 receiver_id; + u64 input_data_address; + u32 input_data_size; + u32 input_data_checksum; + u64 output_data_address; + u32 output_data_size; + u32 output_data_checksum; + u32 returned_status; + u8 reserved2[8]; + u32 forwarder_id; +}; +struct uvb_window_description { + u64 obtain; /* This address is used to obtain this window */ + u64 address; /* The address of uvb window */ + u64 buffer; /* Buffer address of this window, 0 if no buffer */ + u32 size; /* The size of buffer, same for all windows in one uvb */ + u32 reserved; +}; + +struct uvb { + u8 window_count; + bool secure; + u16 delay; /* us */ + u32 reserved; + struct uvb_window_description wd[]; +}; + +struct uvb_info { + u8 uvb_count; + u8 reserved[7]; + struct uvb *uvbs[]; +}; + +extern struct uvb_info *g_uvb_info; + +#endif diff --git a/drivers/ubios_uvb/include/odf_interface.h b/drivers/ubios_uvb/include/odf_interface.h new file mode 100644 index 000000000000..2ca2f591b04e --- /dev/null +++ b/drivers/ubios_uvb/include/odf_interface.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: odf interface header + * Author: zhangrui + * Create: 2025-04-18 + */ +#ifndef ODF_INTERFACE_H +#define ODF_INTERFACE_H +#include + +/* UBRT table info */ +#define ACPI_SIG_UBRT "UBRT" +#define UBRT_UB_CONTROLLER 0 +#define UBRT_UMMU 1 +#define UBRT_UB_MEMORY 2 +#define UBRT_VIRTUAL_BUS 3 +#define UBRT_CALL_ID_SERVICE 4 + +struct ubios_od_value_struct { + char *name; + u8 type; + u32 data_length; + void *data; +}; + +struct ubios_od_header { + char name[16]; + u32 total_size; + u8 version; + u8 reserved[3]; + u32 remaining_size; + u32 checksum; +}; + +/* +Data structure of UBIOS OD Root Table show below: +|----ubios_od_root----| +| Header | +| count | +| reserved | +| odfs[0] | if not 0 --point to--> a od file +| odfs[...] | if not 0 --point to--> a od file +| odfs[count - 1] | if not 0 --point to--> a od file +*/ +struct ubios_od_root { + struct ubios_od_header header; + u16 count; + u8 reserved[6]; + u64 odfs[]; +}; + +struct ubios_od_table_info { + char *table_name; + u16 row; + u8 col; + char *sub_name_start; + void *value_start; + void *table_end; + u32 length_per_row; +}; + +struct ubios_od_list_info { + char *name; + u8 data_type; /* not include list type */ + u16 count; /* value count in the list */ + void *start; /* pointer to the first value in the list */ + void *end; /* end of list, not include */ +}; + +/** + * struct ubrt_sub_tables - UBRT Sub tables + * @type: type of tables + * @pointer: address to tables + */ +struct ubrt_sub_tables { + u8 type; + u8 reserved[7]; + u64 pointer; +}; + +/** + * struct ubios_ubrt_table - UBRT info + * @count: count of tables + * @sub tables: Sub tables[count] + */ + +struct ubios_ubrt_table { + struct acpi_table_header header; + u32 count; + struct ubrt_sub_tables sub_tables[]; +}; + +#endif diff --git a/drivers/ubios_uvb/odf/Makefile b/drivers/ubios_uvb/odf/Makefile new file mode 100644 index 000000000000..3c76e02dee82 --- /dev/null +++ b/drivers/ubios_uvb/odf/Makefile @@ -0,0 +1,10 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. +# Create : 2025-04-18 +# Description : odf Makefile + +obj-y = odf_get_fdt.o + +obj-$(CONFIG_UDFI_ODF) += odf.o +odf-objs := odf_trans.o odf_data.o odf_file.o odf_helper.o + +ccflags-y += -I$(srctree)/$(src)/../include diff --git a/drivers/ubios_uvb/odf/include/libodf.h b/drivers/ubios_uvb/odf/include/libodf.h new file mode 100644 index 000000000000..74e9b98bf878 --- /dev/null +++ b/drivers/ubios_uvb/odf/include/libodf.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: libodf header + * Author: zhangrui + * Create: 2025-04-18 + */ +#ifndef LIBODF_H +#define LIBODF_H +#include "cis_uvb_interface.h" +#include "libodf_handle.h" + +#define UBIOS_OD_NAME_LEN_MAX 16 +#define UBIOS_OD_VERSION 1 +#define UBIOS_OD_EMPTY 0 + +#define UBIOS_OD_TYPE_U8 0x1 +#define UBIOS_OD_TYPE_U16 0x2 +#define UBIOS_OD_TYPE_U32 0x3 +#define UBIOS_OD_TYPE_U64 0x4 +#define UBIOS_OD_TYPE_S8 0x5 +#define UBIOS_OD_TYPE_S16 0x6 +#define UBIOS_OD_TYPE_S32 0x7 +#define UBIOS_OD_TYPE_S64 0x8 +#define UBIOS_OD_TYPE_BOOL 0x10 +#define UBIOS_OD_TYPE_CHAR 0x20 +#define UBIOS_OD_TYPE_STRING 0x21 +#define UBIOS_OD_TYPE_STRUCT 0x30 +#define UBIOS_OD_TYPE_TABLE 0x40 +#define UBIOS_OD_TYPE_FILE 0x50 +#define UBIOS_OD_TYPE_LIST 0x80 + +#define UBIOS_OD_ROOT_NAME "root_table" + +#define UBIOS_OD_INVALID_INDEX 0xFFFF + +#define UBIOS_OD_PATH_SEPARATOR '/' +#endif diff --git a/drivers/ubios_uvb/odf/include/libodf_handle.h b/drivers/ubios_uvb/odf/include/libodf_handle.h new file mode 100644 index 000000000000..e50d26d3afd3 --- /dev/null +++ b/drivers/ubios_uvb/odf/include/libodf_handle.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: libodf handle header + * Author: zhangrui + * Create: 2025-04-18 + */ +#ifndef LIBODF_HANDLE_H +#define LIBODF_HANDLE_H +#include + +extern struct ubios_od_root *od_root; + +/** +@brief Search and match one value name, return the pointer of value if matched. +@param[in] start start address of the search. +@param[in] end end address of the search. +@param[in] name value name. +@param[out] vs used to return value structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + start must pointer to the name of value. +*/ +int odf_get_vs_by_name(u8 *start, u8 *end, char *name, struct ubios_od_value_struct *vs); + +/** +@brief Get table information like row, colomn, sub types, .etc. +@param[in] vs value structure +@param[out] table_info used to return table info. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_vs_to_table(struct ubios_od_value_struct *vs, struct ubios_od_table_info *table_info); + +/** +@brief Get a value's offset in row of table, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] name name of data in table wanted to get. +@param[in] type data type. +@param[out] offset used to return offset in the row. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_offset_in_table(const struct ubios_od_table_info *table, + char *name, u8 type, u32 *offset); + +/** +@brief Get a value from table according name and row, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] row the row of table wanted to get. +@param[in] name name of data in table wanted to get. +@param[in] type data type. +@param[out] value data pointer to store returned value. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_data_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 type, void *value); + +/** +@brief Get a value from table according name and row, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] row the row of table wanted to get. +@param[in] name name of data in table wanted to get. +@param[out] value used to return value. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_u8_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 *value); +int odf_get_u32_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u32 *value); +int odf_get_u64_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u64 *value); + +/** +@brief Get a list from od root, will return a list info structure. +@param[in] root root pointer of od +@param[in] path full path to search, if not include index of table. +@param[out] list used to return a list info structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_list(struct ubios_od_root *root, char *path, struct ubios_od_list_info *list); + +int odf_get_struct(struct ubios_od_root *root, char *path, struct ubios_od_value_struct *vs); + +int odf_get_u32_from_list(const struct ubios_od_list_info *list, u16 index, u32 *value); + +/** +@brief Get a value structure from list by index. +@param[in] list list get by function OdfGetList +@param[in] index index in list to get. +@param[out] vs used to return a value structrue +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + Usually function is useful when the data type in list is struct, could get value structure, + then use OdfGetVsByName to search inside. +*/ +int odf_get_data_from_list(const struct ubios_od_list_info *list, + u16 index, struct ubios_od_value_struct *vs); + +/** +@brief Get next structure of a list. +@param[in] list list pointer witch this data belong to. +@param[in/out] vs current structure as input, next structure as output. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + The caller ensure the input structure is a member of list, this function can't check this. +*/ +int odf_next_in_list(const struct ubios_od_list_info *list, struct ubios_od_value_struct *vs); + +/** +@brief Get a value from struct according name, will check type first. +@param[in] vs standard structure of a struct +@param[in] name name of data in table wanted to get. +@param[out] value used to return value. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_u8_from_struct(const struct ubios_od_value_struct *vs, char *name, u8 *value); +int odf_get_u16_from_struct(const struct ubios_od_value_struct *vs, char *name, u16 *value); +int odf_get_u32_from_struct(const struct ubios_od_value_struct *vs, char *name, u32 *value); +int odf_get_bool_from_struct(const struct ubios_od_value_struct *vs, char *name, bool *value); +int odf_get_table_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_table_info *table); +int odf_get_list_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_list_info *list); +int odf_get_list_from_table(u8 *table, char *path, struct ubios_od_list_info *list); +int odf_get_vs_from_table(u8 *table, char *path, struct ubios_od_value_struct *vs); +/** +@brief Check od root's name and checksum, return is it valid. +@param[in] root start of od root +@return +@retval = true, it is valid. +@retval = false, it is invalid. +*/ +bool is_od_root_valid(struct ubios_od_root *root); + +/** +@brief Check od file's checksum, return is it valid. +@param[in] file start of od file +@return +@retval = true, it is valid. +@retval = false, it is invalid. +*/ +bool is_od_file_valid(u8 *file); + +/** +@brief Search all pointer in od root, return the specific od file matched the input name. +@param[in] root start of od root +@param[in] name name of od +@return +@retval = NULL, not found. +@retval != NULL, found. +*/ +u8 *odf_get_od_file(struct ubios_od_root *root, char *name); + +u8 odf_read8(u8 *address); +u16 odf_read16(u8 *address); +u32 odf_read32(u8 *address); +u64 odf_read64(u8 *address); + +u32 odf_checksum(u8 *data, u32 size); +bool odf_is_checksum_ok(struct ubios_od_header *header); +void odf_update_checksum(struct ubios_od_header *header); +int odf_separate_name(char **path, char *name, u64 max_len, u16 *index); +void odf_get_vs_by_pointer(u8 *data, struct ubios_od_value_struct *vs); + +#endif diff --git a/drivers/ubios_uvb/odf/include/odf_trans.h b/drivers/ubios_uvb/odf/include/odf_trans.h new file mode 100644 index 000000000000..1d03979ee8f5 --- /dev/null +++ b/drivers/ubios_uvb/odf/include/odf_trans.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: odf trans header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef ODF_TRANS_H +#define ODF_TRANS_H +#include "libodf.h" + +#define ODF_FILE_NAME_CALL_ID_SERVICE "call_id_service" +#define ODF_NAME_CIS_GROUP "group" +#define ODF_NAME_CIS_UB "ub" +#define ODF_NAME_CIS_OWNER "owner" +#define ODF_NAME_CIS_CIA "cia" +#define ODF_NAME_CIS_CALL_ID "call_id" +#define ODF_NAME_CIS_USAGE "usage" +#define ODF_NAME_CIS_INDEX "index" +#define ODF_NAME_CIS_FORWARDER_ID "forwarder" + +/* odf processing */ +#define ODF_FILE_NAME_VIRTUAL_BUS "virtual_bus" +#define ODF_NAME_UVB "uvb" +#define ODF_NAME_SECURE "secure" +#define ODF_NAME_DELAY "delay" +#define ODF_NAME_WD "wd" +#define ODF_NAME_OBTAIN "obtain" +#define ODF_NAME_ADDRESS "address" +#define ODF_NAME_BUFFER "buffer" +#define ODF_NAME_SIZE "size" + +#endif diff --git a/drivers/ubios_uvb/odf/odf_data.c b/drivers/ubios_uvb/odf/odf_data.c new file mode 100644 index 000000000000..3dee113b8e29 --- /dev/null +++ b/drivers/ubios_uvb/odf/odf_data.c @@ -0,0 +1,725 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: ODF data processing, handles ODF various odf data structures + * Author: zhangrui + * Create: 2025-04-18 + */ +#include +#include "include/libodf.h" +#include "include/libodf_handle.h" + +/** +@brief Search and match one value name, return the pointer of value structrue if matched. +@param[in] start start address of the search. +@param[in] end end address of the search. +@param[in] name value name. +@param[out] vs used to return value structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_vs_by_name(u8 *start, u8 *end, char *name, struct ubios_od_value_struct *vs) +{ + struct ubios_od_value_struct temp; + + if (!start || !end || !name || !vs) + return -EINVAL; + + if (start >= end) + return -ENOENT; + + odf_get_vs_by_pointer(start, &temp); + + if (strcmp(name, temp.name) == 0) { + *vs = temp; + return 0; + } + + return odf_get_vs_by_name(temp.data + temp.data_length, end, name, vs); +} + +static void odf_vs_to_list(struct ubios_od_value_struct *vs, struct ubios_od_list_info *list) +{ + list->name = vs->name; + list->data_type = vs->type & ~UBIOS_OD_TYPE_LIST; + list->count = odf_read16(vs->data); + list->start = vs->data + sizeof(u16); + list->end = vs->data + vs->data_length; +} + +/** +Change value structure by index in a list, the name will not be changed, +Both change value pointer and length and type. +note: + index could be 0, that means get the first one in list. +*/ +static int odf_change_vs_in_list(struct ubios_od_value_struct *vs, u16 index) +{ + struct ubios_od_list_info list; + + odf_vs_to_list(vs, &list); + + return odf_get_data_from_list(&list, index, vs); +} + +/** +Change the value structure with index, move the pointer to the data indicated by index, +and update length. +Note: +Only list support index in path, other type will return not support if index != 0. +*/ +static int odf_change_vs_by_index(struct ubios_od_value_struct *vs, u16 index) +{ + if ((vs->type & UBIOS_OD_TYPE_LIST) == UBIOS_OD_TYPE_LIST) + return odf_change_vs_in_list(vs, index); + + if (index > 0) + return -EOPNOTSUPP; + else + return 0; +} + +/** +Search one od file, input value path, output the value structure, contains value info +*/ +static int odf_get_vs_from_file(u8 *file, char *path, struct ubios_od_value_struct *vs) +{ + int status; + u16 index; + char name[UBIOS_OD_NAME_LEN_MAX]; + struct ubios_od_header *header = (struct ubios_od_header *)file; + bool is_got_vs = false; + + if (!is_od_file_valid(file)) { + pr_err(ERR_PRE "odf: file[%llx] invalid\n", (u64)file); + return -EINVAL; + } + + /* start from the od file data */ + vs->data = (u8 *)(header + 1); + vs->data_length = header->total_size - header->remaining_size - + sizeof(struct ubios_od_header); + while (odf_separate_name(&path, name, UBIOS_OD_NAME_LEN_MAX, &index) == 0) { + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, vs); + if (status) { + pr_err(ERR_PRE "odf: can not find name[%s]'s value\n", name); + return status; + } + is_got_vs = true; + if (index != UBIOS_OD_INVALID_INDEX) { + status = odf_change_vs_by_index(vs, index); + if (status) { + pr_err(ERR_PRE "odf: get value by index failed, name[%s], type[%#x], index[%#x]\n", + name, vs->type, index); + return status; + } + } + } + if ((is_got_vs) && !path) + return 0; + + pr_err(ERR_PRE "odf: failed, left path[%s]\n", path); + + return -EOPNOTSUPP; +} + +/** +Search all od file in the root, input value path, output the value structure, contains value info. +If file is not NULL, also return od file, could used to update info of od file, such as checksum. +*/ +static int odf_get_vs_from_root(struct ubios_od_root *root, char *path, + u8 **file, struct ubios_od_value_struct *vs) +{ + int status; + char name[UBIOS_OD_NAME_LEN_MAX]; + u8 *od_file = NULL; + + status = odf_separate_name(&path, name, UBIOS_OD_NAME_LEN_MAX, NULL); + if (status) { + pr_err(ERR_PRE "odf: get od file name failed, %d\n", status); + return status; + } + + od_file = odf_get_od_file(root, name); + if (!od_file) { + pr_err(ERR_PRE "odf: can not find od file[%s]\n", name); + return -ENOENT; + } + + if (file) + *file = od_file; + + return odf_get_vs_from_file(od_file, path, vs); +} + +static bool is_root_and_path_valid(struct ubios_od_root *root, char *path) +{ + if (!is_od_root_valid(root)) + return false; + + if (!path) { + pr_err(ERR_PRE "odf: path is NULL\n"); + return false; + } + + return true; +} + + +/** +@brief Get table information like row, colomn, sub types, .etc. +@param[in] vs value structure +@param[out] table_info used to return table info. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_vs_to_table(struct ubios_od_value_struct *vs, struct ubios_od_table_info *table_info) +{ + u64 i; + u8 type; + u8 *p = vs->data; + + table_info->table_name = vs->name; + table_info->length_per_row = 0; + table_info->row = odf_read16(p); + p += sizeof(u16); + table_info->col = odf_read8(p); + p += sizeof(u8); + table_info->sub_name_start = (char *)p; + + for (i = 0; i < table_info->col; i++) { + p += strlen((char *)p) + 1; + type = odf_read8(p); + p++; + switch (type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + table_info->length_per_row += sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + table_info->length_per_row += sizeof(u16); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + table_info->length_per_row += sizeof(u32); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + table_info->length_per_row += sizeof(u64); + break; + default: + pr_err(ERR_PRE "odf: get table[%s] info, invalid type[%d] of column[%llu]\n", + table_info->table_name, type, i); + return -EOPNOTSUPP; + } + } + table_info->value_start = p; + table_info->table_end = table_info->value_start + + table_info->length_per_row * table_info->row; + + return 0; +} + +/** +@brief Get a value's offset in row of table, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] name name of data in table wanted to get. +@param[in] type data type. +@param[out] offset used to return offset in the row. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_offset_in_table(const struct ubios_od_table_info *table, + char *name, u8 type, u32 *offset) +{ + u64 i; + u8 data_type; + u32 temp_offset = 0; + char *sub_name = NULL; + + if (!table || !name || !offset) + return -EINVAL; + + /* fisrt sub name */ + sub_name = table->sub_name_start; + for (i = 0; i < table->col; i++) { + data_type = odf_read8((u8 *)sub_name + strlen(sub_name) + 1); + if (strcmp(name, sub_name) == 0) + break; + sub_name += strlen(sub_name) + 1 + sizeof(data_type); + switch (data_type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + temp_offset += sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + temp_offset += sizeof(u16); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + temp_offset += sizeof(u32); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + temp_offset += sizeof(u64); + break; + default: + pr_err(ERR_PRE "odf: get table info, invalid type[%d] of column[%llu]\n", + data_type, i); + return -EOPNOTSUPP; + } + } + if (i == table->col) + return -ENOENT; + + if (type != data_type) + return -EFAULT; + + *offset = temp_offset; + + return 0; +} + +/** +@brief Get a value pointer from table according name and row, will check type first. +@param[in] table table info get from function OdfGetTable +@param[in] name name of data in table wanted to get. +@param[in] row the row of table wanted to get. +@param[in] type data type. +@param[out] data used to return data pointer. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_data_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 type, void *value) +{ + int status; + u32 offset; + u8 *p; + + if (!table || !name || !value) + return -EINVAL; + + if (row >= table->row) + return -EOVERFLOW; + + status = odf_get_offset_in_table(table, name, type, &offset); + if (status) + return status; + + p = table->value_start + table->length_per_row * row + offset; + switch (type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + *(u8 *)value = odf_read8(p); + break; + case UBIOS_OD_TYPE_S8: + *(s8 *)value = (s8)odf_read8(p); + break; + case UBIOS_OD_TYPE_U16: + *(u16 *)value = odf_read16(p); + break; + case UBIOS_OD_TYPE_S16: + *(s16 *)value = (s16)odf_read16(p); + break; + case UBIOS_OD_TYPE_U32: + *(u32 *)value = odf_read32(p); + break; + case UBIOS_OD_TYPE_S32: + *(s32 *)value = (s32)odf_read32(p); + break; + case UBIOS_OD_TYPE_U64: + *(u64 *)value = odf_read64(p); + break; + case UBIOS_OD_TYPE_S64: + *(s64 *)value = (s64)odf_read64(p); + break; + default: + pr_err(ERR_PRE "odf: get table data failed, invalid type[%#x]\n", type); + return -EOPNOTSUPP; + } + + return status; +} + +int odf_get_u8_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u8 *value) +{ + return odf_get_data_from_table(table, row, name, UBIOS_OD_TYPE_U8, value); +} + +int odf_get_u32_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u32 *value) +{ + return odf_get_data_from_table(table, row, name, UBIOS_OD_TYPE_U32, value); +} + +int odf_get_u64_from_table(const struct ubios_od_table_info *table, + u16 row, char *name, u64 *value) +{ + return odf_get_data_from_table(table, row, name, UBIOS_OD_TYPE_U64, value); +} + +int odf_get_vs_from_table(u8 *table, char *path, struct ubios_od_value_struct *vs) +{ + if (!table || !vs || !path) + return -EINVAL; + + return odf_get_vs_from_file(table, path, vs); +} + +int odf_get_list_from_table(u8 *table, char *path, struct ubios_od_list_info *list) +{ + int status; + struct ubios_od_value_struct vs; + + if (!table || !list) + return -EINVAL; + + status = odf_get_vs_from_table(table, path, &vs); + if (status) + return status; + + if ((vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { + pr_err(ERR_PRE "odf:the type[%#x] is not a list\n", vs.type); + return -EFAULT; + } + + odf_vs_to_list(&vs, list); + + return 0; +} + +/** +@brief Get a ubios od value struct from od root according to the path +@param[in] root root pointer of od +@param[in] path full path to search, if not include index of table. +@param[out] vs used to return a ubios od value struct. +@return returned status fo the call +@retval = 0, get ubios od value struct success, saved in parameter vs. +@retval < 0, get ubios od value struct failed. +*/ +int odf_get_struct(struct ubios_od_root *root, char *path, struct ubios_od_value_struct *vs) +{ + int status; + + if (!is_root_and_path_valid(root, path)) + return -EINVAL; + + status = odf_get_vs_from_root(root, path, NULL, vs); + + return status; +} + +/** +@brief Get a list from od root, will return a list info structure. +@param[in] root root pointer of od +@param[in] path full path to search, if not include index of table. +@param[out] list used to return a list info structure. +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_get_list(struct ubios_od_root *root, char *path, struct ubios_od_list_info *list) +{ + int status; + struct ubios_od_value_struct vs; + + if (!is_root_and_path_valid(root, path) || !list) + return -EINVAL; + + status = odf_get_vs_from_root(root, path, NULL, &vs); + if (status) + return status; + + if ((vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { + pr_err(ERR_PRE "the type[%#x] is not a list\n", vs.type); + return -EFAULT; + } + + odf_vs_to_list(&vs, list); + + return 0; +} + +int odf_get_u32_from_list(const struct ubios_od_list_info *list, u16 index, u32 *value) +{ + if (!value) + return -EINVAL; + + if (list->data_type != UBIOS_OD_TYPE_U32) + return -EFAULT; + + *value = odf_read32(list->start + sizeof(u32) * index); + + return 0; +} + +/** +@brief Get a value structure from list by index. +@param[in] list list get by function OdfGetList +@param[in] index index in list to get. +@param[out] vs used to return a value structrue +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +@note: + Usually this function is useful when the data type in list is struct, get value structure, + then use OdfGetVsByName to search inside. +*/ +int odf_get_data_from_list(const struct ubios_od_list_info *list, + u16 index, struct ubios_od_value_struct *vs) +{ + u64 i; + u32 len; + u8 *p; + + if (!list || !vs) + return -EINVAL; + + if (index >= list->count) + return -EOVERFLOW; + + vs->name = list->name; + vs->type = list->data_type; + p = list->start; + switch (vs->type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + vs->data = list->start + index * sizeof(u8); + vs->data_length = sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + vs->data = list->start + index * sizeof(u16); + vs->data_length = sizeof(u16); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + vs->data = list->start + index * sizeof(u32); + vs->data_length = sizeof(u32); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + vs->data = list->start + index * sizeof(u64); + vs->data_length = sizeof(u64); + break; + case UBIOS_OD_TYPE_STRING: + for (i = 0; i < index; i++) + p += (strlen((char *)p) + 1); + vs->data = p; + vs->data_length = (u32)strlen((char *)p) + 1; + break; + case UBIOS_OD_TYPE_STRUCT: + for (i = 0; i < index; i++) { + len = odf_read32(p); + p += (sizeof(u32) + len); + } + vs->data = p + sizeof(u32); + vs->data_length = odf_read32(p); + break; + default: + pr_err(ERR_PRE "odf: invalid type[%#x], not support\n", vs->type); + return -EOPNOTSUPP; + } + + return 0; +} + +/** +@brief Get next value of a list. +@note: + The caller should ensure the input structure is a member of list, + this function can only check some of this. +*/ +int odf_next_in_list(const struct ubios_od_list_info *list, struct ubios_od_value_struct *vs) +{ + u8 *p; + + if (!vs) + return -EINVAL; + + if (list->data_type != vs->type || strcmp(list->name, vs->name)) + return -EFAULT; + + switch (vs->type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + vs->data = vs->data + vs->data_length; + break; + case UBIOS_OD_TYPE_STRING: + vs->data = vs->data + vs->data_length; + vs->data_length = (u32)strlen((char *)vs->data) + 1; + break; + case UBIOS_OD_TYPE_STRUCT: + p = vs->data + vs->data_length; + vs->data_length = odf_read32(p); + vs->data = p + sizeof(u32); + break; + default: + pr_err(ERR_PRE "odf: invalid type[%#x], not support\n", vs->type); + return -EOPNOTSUPP; + } + if (vs->data >= list->end) + return -EOVERFLOW; + + return 0; +} + +/** +Internal function, get data pointer by path and type. +*/ +static int odf_get_data_and_check_type(const struct ubios_od_value_struct *vs, + char *name, u8 type, void **data) +{ + int status; + struct ubios_od_value_struct temp_vs; + + if (!vs || !name || !data) + return -EINVAL; + + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, &temp_vs); + if (status) + return status; + + if (temp_vs.type != type) + return -EFAULT; + + *data = temp_vs.data; + + return 0; +} + +int odf_get_u8_from_struct(const struct ubios_od_value_struct *vs, char *name, u8 *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_U8, (void **)&data); + if (status) + return status; + + *value = odf_read8(data); + + return 0; +} + +int odf_get_u16_from_struct(const struct ubios_od_value_struct *vs, char *name, u16 *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_U16, (void **)&data); + if (status) + return status; + + *value = odf_read16(data); + + return 0; +} + +int odf_get_u32_from_struct(const struct ubios_od_value_struct *vs, char *name, u32 *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_U32, (void **)&data); + if (status) + return status; + + *value = odf_read32(data); + + return 0; +} + +int odf_get_bool_from_struct(const struct ubios_od_value_struct *vs, char *name, bool *value) +{ + int status; + u8 *data; + + if (!value) + return -EINVAL; + + status = odf_get_data_and_check_type(vs, name, UBIOS_OD_TYPE_BOOL, (void **)&data); + if (status) + return status; + + *value = odf_read8(data); + + return 0; +} + +/** +Get table in the value structure. +*/ +int odf_get_table_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_table_info *table) +{ + int status; + struct ubios_od_value_struct temp_vs; + + if (!vs || !name || !table) + return -EINVAL; + + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, &temp_vs); + if (status) + return status; + + if (temp_vs.type != UBIOS_OD_TYPE_TABLE) + return -EFAULT; + + return odf_vs_to_table(&temp_vs, table); +} + +int odf_get_list_from_struct(const struct ubios_od_value_struct *vs, + char *name, struct ubios_od_list_info *list) +{ + int status; + struct ubios_od_value_struct temp_vs; + + if (!vs || !name || !list) + return -EINVAL; + + status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, &temp_vs); + if (status) + return status; + + if ((temp_vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { + pr_err(ERR_PRE "the type[%#x] is not a list\n", temp_vs.type); + return -EFAULT; + } + + odf_vs_to_list(&temp_vs, list); + + return 0; +} diff --git a/drivers/ubios_uvb/odf/odf_file.c b/drivers/ubios_uvb/odf/odf_file.c new file mode 100644 index 000000000000..a37d65b05242 --- /dev/null +++ b/drivers/ubios_uvb/odf/odf_file.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: ODF file validation and retrieval functions + * Author: zhangrui + * Create: 2025-04-18 + */ +#include +#include +#include "include/libodf.h" + +bool is_od_root_valid(struct ubios_od_root *root) +{ + if (!root) { + pr_err(ERR_PRE "odf: root is NULL\n"); + return false; + } + + if (!odf_is_checksum_ok(&(root->header))) { + pr_err(ERR_PRE "odf: root checksum error.\n"); + return false; + } + + if (strcmp(root->header.name, UBIOS_OD_ROOT_NAME)) { + pr_err(ERR_PRE "odf: root name[%s] mismatch\n", root->header.name); + return false; + } + + return true; +} + +bool is_od_file_valid(u8 *file) +{ + struct ubios_od_header *header = (struct ubios_od_header *)file; + + if (!header) { + pr_err(ERR_PRE "odf: file is NULL\n"); + return false; + } + + if (!odf_is_checksum_ok(header)) { + pr_err(ERR_PRE "odf: file checksum error.\n"); + return false; + } + + return true; +} + +/** +@brief Search all pointer in od root, return the specific od file matched the input name. +@param[in] root start of od root +@param[in] name name of od +@return +@retval = NULL, not found. +@retval != NULL, found. +*/ +u8 *odf_get_od_file(struct ubios_od_root *root, char *name) +{ + u64 i; + + if (!is_od_root_valid(root)) + return NULL; + + if (!name) + return NULL; + + for (i = 0; i < root->count; i++) { + if (root->odfs[i] == UBIOS_OD_EMPTY) + continue; + + if (strcmp(name, (char *)(u64)root->odfs[i]) == 0) + return (u8 *)(u64)root->odfs[i]; + } + + return NULL; +} diff --git a/drivers/ubios_uvb/odf/odf_helper.c b/drivers/ubios_uvb/odf/odf_helper.c new file mode 100644 index 000000000000..02e9a6a1895f --- /dev/null +++ b/drivers/ubios_uvb/odf/odf_helper.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: ODF helper func, including data reading, checksum and path parsing + * Author: zhangrui + * Create: 2025-04-18 + */ +#include +#include +#include +#include "include/libodf.h" + +#define UBIOS_OD_INDEX_STRING_MAX 7 +#define DECIMAL 10 + +/* To ensure alignment access, read by one byte */ +static void odf_read(u8 *address, u8 *value, u64 size) +{ + u64 i; + + for (i = 0; i < size; i++) + value[i] = address[i]; +} + +u8 odf_read8(u8 *address) +{ + return *address; +} + +u16 odf_read16(u8 *address) +{ + u16 temp; + + odf_read(address, (u8 *)&temp, sizeof(u16)); + return temp; +} + +u32 odf_read32(u8 *address) +{ + u32 temp; + + odf_read(address, (u8 *)&temp, sizeof(u32)); + return temp; +} + +u64 odf_read64(u8 *address) +{ + u64 temp; + + odf_read(address, (u8 *)&temp, sizeof(u64)); + return temp; +} + +u32 odf_checksum(u8 *data, u32 size) +{ + u64 sum = 0; + u32 temp = size % sizeof(u32); + u64 i; + + for (i = 0; i < size - temp; i += sizeof(u32)) + sum += odf_read32(data + i); + + switch (temp) { + case 1: + sum += odf_read8(data + i); + break; + case 2: + sum += odf_read16(data + i); + break; + case 3: + sum += odf_read32(data + i) & 0x00FFFFFF; + break; + default: + break; + } + + return (~((u32)sum) + 1); +} + +/** +Only calculate the valid data region +*/ +bool odf_is_checksum_ok(struct ubios_od_header *header) +{ + u32 checksum; + + checksum = odf_checksum((u8 *)header, header->total_size); + if (checksum == 0) + return true; + else + return false; +} + +void odf_update_checksum(struct ubios_od_header *header) +{ + header->checksum = 0; + header->checksum = odf_checksum((u8 *)header, header->total_size); +} + +/* +@brief Separate a name from path + change path to the new pointer after this name, if finished, set to NULL + Return a index if it contain [] after name, if input index is NULL, ignore it +@param[in] path a string to be separated +@param[out] name a name separate from path +@param[in] maxLen max length of the name +@param[out] index if do not have index, return 0xFFFF(-1) +@return returned status fo the call +@retval = 0, success. +@retval < 0, failed. +*/ +int odf_separate_name(char **path, char *name, u64 max_len, u16 *index) +{ + char *c; + u64 i; + u64 j; + int ret; + char index_string[UBIOS_OD_INDEX_STRING_MAX] = {'\0'}; + bool is_index = false; + + if (!path || !name) + return -EINVAL; + + if (!*path) + return -EOPNOTSUPP; + + c = *path; + pr_debug(LOG_PRE "odf separate name: path[%s]\n", *path); + + /* if the first character is a separator, skip it */ + if (*c == UBIOS_OD_PATH_SEPARATOR) + c++; + + i = 0; + j = 0; + while ((i < max_len) && (j < UBIOS_OD_INDEX_STRING_MAX)) { + if (*c == UBIOS_OD_PATH_SEPARATOR || *c == '\0') { + name[i++] = '\0'; + if (index) { + ret = kstrtou16(index_string, DECIMAL, index); + if (ret) + *index = UBIOS_OD_INVALID_INDEX; + } + pr_debug(LOG_PRE "odf separate name: got name[%s]\n", name); + break; + } else if (*c == '[') { + is_index = true; + } else if (*c == ']') { + index_string[j++] = '\0'; + is_index = false; + } else { + if (is_index) + index_string[j++] = *c; + else + name[i++] = *c; + } + c++; + } + + if ((i > max_len) || (j >= UBIOS_OD_INDEX_STRING_MAX)) + return -EOVERFLOW; + + if (*c == '\0') + *path = NULL; + else + *path = c + 1; + + return 0; +} + +/** +@brief Get a name/value structrue by the data pointer +@param[in] data start address of data. +@param[out] vs used to return value structure. +*/ +void odf_get_vs_by_pointer(u8 *data, struct ubios_od_value_struct *vs) +{ + u8 *type_pointer = NULL; + u8 sizeof_length = 0; + + vs->name = (char *)data; + type_pointer = (u8 *)vs->name + strlen(vs->name) + 1; + vs->type = odf_read8(type_pointer); + switch (vs->type) { + case UBIOS_OD_TYPE_U8: + case UBIOS_OD_TYPE_S8: + case UBIOS_OD_TYPE_BOOL: + case UBIOS_OD_TYPE_CHAR: + vs->data_length = sizeof(u8); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_U16: + case UBIOS_OD_TYPE_S16: + vs->data_length = sizeof(u16); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_U32: + case UBIOS_OD_TYPE_S32: + vs->data_length = sizeof(u32); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_U64: + case UBIOS_OD_TYPE_S64: + vs->data_length = sizeof(u64); + vs->data = type_pointer + sizeof(u8); + break; + case UBIOS_OD_TYPE_STRING: + vs->data = type_pointer + sizeof(u8); + vs->data_length = (u32)strlen(vs->data) + 1; + break; + default: + sizeof_length = sizeof(u32); + vs->data_length = odf_read32(type_pointer + sizeof(u8)); + vs->data = type_pointer + sizeof(u8) + sizeof_length; + break; + } +} diff --git a/drivers/ubios_uvb/odf/odf_trans.c b/drivers/ubios_uvb/odf/odf_trans.c new file mode 100644 index 000000000000..7f35fcaa84b0 --- /dev/null +++ b/drivers/ubios_uvb/odf/odf_trans.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: odf trans file + * Author: zhangrui + * Create: 2025-04-18 + */ +#include +#include +#include +#include +#include +#include +#include +#include "include/odf_trans.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ODF Api"); + +struct cis_info *g_cis_info; +EXPORT_SYMBOL(g_cis_info); + + +void free_cis_info(void) +{ + u32 i; + + if (!g_cis_info) + return; + + for (i = 0; i < (g_cis_info)->group_count; i++) { + if ((g_cis_info)->groups[i]) { + kfree((g_cis_info)->groups[i]); + (g_cis_info)->groups[i] = NULL; + } + } + kfree(g_cis_info); + g_cis_info = NULL; +} + +static struct cis_group *create_group_from_vs(struct ubios_od_value_struct *vs) +{ + struct ubios_od_list_info list; + struct cis_group *group; + int status; + int i; + + status = odf_get_list_from_struct(vs, ODF_NAME_CIS_CALL_ID, &list); + if (status) { + pr_err(ERR_PRE "create group: get [call id list] failed, err = %d\n", status); + return NULL; + } + group = kzalloc(sizeof(struct cis_group) + (sizeof(u32) * list.count), GFP_KERNEL); + if (!group) + return NULL; + + status = odf_get_u32_from_struct(vs, ODF_NAME_CIS_OWNER, &(group->owner_user_id)); + if (status) { + pr_err(ERR_PRE "create group: get [owner id] failed, err = %d\n", status); + goto fail; + } + + status = odf_get_u8_from_struct(vs, ODF_NAME_CIS_USAGE, &(group->usage)); + if (status) { + pr_err(ERR_PRE "create group: get [usage] failed, err = %d\n", status); + goto fail; + } + + status = odf_get_u8_from_struct(vs, ODF_NAME_CIS_INDEX, &(group->index)); + if (status) + pr_info(LOG_PRE "cis group not get [index], use default value\n"); + + status = odf_get_u32_from_struct(vs, ODF_NAME_CIS_FORWARDER_ID, &(group->forwarder_id)); + if (status) + pr_info(LOG_PRE "cis group not get forwarder, use default value\n"); + + group->cis_count = list.count; + for (i = 0; i < list.count; i++) { + status = odf_get_u32_from_list(&list, i, &(group->call_id[i])); + if (status) { + pr_err(ERR_PRE "create group: get each call id failed, err = %d\n", status); + goto fail; + } + } + + return group; + +fail: + kfree(group); + + return NULL; +} + +static int create_cis_info_from_odf(void) +{ + struct ubios_od_list_info list; + struct ubios_od_value_struct vs; + struct ubios_od_value_struct ub_vs; + struct ubios_ubrt_table *ubrt_table = NULL; + struct acpi_table_header *table = NULL; + u8 *sub_table = NULL; + struct ubios_od_header *header = NULL; + acpi_status status; + int i = 0; + int err = 0; + u32 sub_table_size = 0; + int ub_vs_err = 0; + + status = acpi_get_table(ACPI_SIG_UBRT, 0, &table); + if (ACPI_SUCCESS(status)) { + ubrt_table = (struct ubios_ubrt_table *)table; + + for (i = 0; i < ubrt_table->count; i++) { + if (ubrt_table->sub_tables[i].type == UBRT_CALL_ID_SERVICE) { + pr_info(LOG_PRE "find cis table in ubrt table\n"); + header = memremap(ubrt_table->sub_tables[i].pointer, + sizeof(struct ubios_od_header), MEMREMAP_WB); + if (!header) { + pr_err(ERR_PRE "failed to map cis table to od header in ACPI\n"); + return -ENOMEM; + } + sub_table_size = header->total_size; + memunmap(header); + sub_table = (u8 *)memremap(ubrt_table->sub_tables[i].pointer, + sub_table_size, MEMREMAP_WB); + break; + } + } + + if (!sub_table) { + pr_err(ERR_PRE "failed to get cis table address in ACPI\n"); + return -ENOMEM; + } + pr_info(LOG_PRE "get cis sub table suceess\n"); + + err = odf_get_list_from_table(sub_table, ODF_NAME_CIS_GROUP, &list); + if (err) { + pr_err(ERR_PRE "create cis info from odf failed, group not found, err = %d\n", + err); + goto fail; + } + + ub_vs_err = odf_get_vs_from_table(sub_table, ODF_NAME_CIS_UB, &ub_vs); + } else { + err = odf_get_list(od_root, + ODF_FILE_NAME_CALL_ID_SERVICE "/" ODF_NAME_CIS_GROUP, &list); + if (err) { + pr_err(ERR_PRE "create cis info from odf failed, group not found, err = %d\n", + err); + return err; + } + + ub_vs_err = odf_get_struct(od_root, + ODF_FILE_NAME_CALL_ID_SERVICE "/" ODF_NAME_CIS_UB, &ub_vs); + } + + g_cis_info = kzalloc(sizeof(struct cis_info) + (sizeof(void *) * list.count), GFP_KERNEL); + if (!g_cis_info) { + err = -ENOMEM; + goto fail; + } + g_cis_info->group_count = list.count; + + err = odf_get_data_from_list(&list, 0, &vs); + if (err) { + pr_err(ERR_PRE "create cis info from odf failed: get data from CIS group failed, err = %d\n", + err); + goto fail; + } + for (i = 0; i < list.count; i++) { + g_cis_info->groups[i] = create_group_from_vs(&vs); + if (!g_cis_info->groups[i]) { + pr_err(ERR_PRE "create cis group from odf failed\n"); + err = -ENODATA; + goto fail; + } + (void)odf_next_in_list(&list, &vs); + } + + if (!ub_vs_err) { + pr_info(LOG_PRE "found ub struct in cis info\n"); + err = odf_get_u8_from_struct(&ub_vs, ODF_NAME_CIS_USAGE, &(g_cis_info->ub.usage)); + if (err) { + pr_err(ERR_PRE "create group: get [usage] failed, err = %d\n", status); + goto fail; + } + + err = odf_get_u8_from_struct(&ub_vs, ODF_NAME_CIS_INDEX, &(g_cis_info->ub.index)); + if (err) + pr_warn(LOG_PRE "ub struct not get [index], use default value\n"); + + err = odf_get_u32_from_struct(&ub_vs, ODF_NAME_CIS_FORWARDER_ID, + &(g_cis_info->ub.forwarder_id)); + if (err) + pr_warn(LOG_PRE "ub struct not get forwarder, use default value\n"); + } else + pr_warn(LOG_PRE "not found ub struct in cis info\n"); + + if (sub_table) + memunmap(sub_table); + + pr_info(LOG_PRE "get cis table from odf success\n"); + + return 0; +fail: + if (sub_table) + memunmap(sub_table); + + free_cis_info(); + + return err; +} + +struct uvb_info *g_uvb_info; +EXPORT_SYMBOL(g_uvb_info); + +static void free_uvb_info(void) +{ + u16 i; + + if (!g_uvb_info) + return; + + for (i = 0; i < (g_uvb_info)->uvb_count; i++) { + if ((g_uvb_info)->uvbs[i]) { + kfree((g_uvb_info)->uvbs[i]); + (g_uvb_info)->uvbs[i] = NULL; + } + } + if (g_uvb_info) { + kfree(g_uvb_info); + g_uvb_info = NULL; + } +} + +static struct uvb *create_uvb_from_vs(const struct ubios_od_value_struct *vs) +{ + struct uvb *temp_uvb; + struct ubios_od_table_info wd; + int status; + u16 row; + + status = odf_get_table_from_struct(vs, ODF_NAME_WD, &wd); + if (status) { + pr_err(ERR_PRE "create uvb info: get [wd] failed, [%d]\n", status); + return NULL; + } + temp_uvb = kzalloc(sizeof(struct uvb) + + sizeof(struct uvb_window_description) * wd.row, GFP_KERNEL); + if (!temp_uvb) + return NULL; + + if (wd.row > UVB_WINDOW_COUNT_MAX) { + pr_err(ERR_PRE "create uvb info: uvb window count[%d] error.\n", wd.row); + goto fail; + } + temp_uvb->window_count = (u8)wd.row; + (void)odf_get_bool_from_struct(vs, ODF_NAME_SECURE, &temp_uvb->secure); + (void)odf_get_u16_from_struct(vs, ODF_NAME_DELAY, &temp_uvb->delay); + for (row = 0; row < wd.row; row++) { + status = odf_get_u64_from_table(&wd, + row, ODF_NAME_OBTAIN, &(temp_uvb->wd[row].obtain)); + if (status) { + pr_err(ERR_PRE "create uvb info: get [obtain] failed, %d.\n", status); + goto fail; + } + status = odf_get_u64_from_table(&wd, + row, ODF_NAME_ADDRESS, &(temp_uvb->wd[row].address)); + if (status) { + pr_err(ERR_PRE "create uvb info: get [address] failed, %d.\n", status); + goto fail; + } + (void)odf_get_u64_from_table(&wd, + row, ODF_NAME_BUFFER, &(temp_uvb->wd[row].buffer)); + (void)odf_get_u32_from_table(&wd, row, ODF_NAME_SIZE, &(temp_uvb->wd[row].size)); + } + + return temp_uvb; +fail: + kfree(temp_uvb); + + return NULL; +} + +static int create_uvb_info_from_odf(void) +{ + struct ubios_od_list_info uvb_list; + struct ubios_od_value_struct vs; + struct ubios_ubrt_table *ubrt_table = NULL; + struct acpi_table_header *table = NULL; + u8 *sub_table = NULL; + struct ubios_od_header *header = NULL; + acpi_status status; + int i = 0; + int err = 0; + u32 sub_table_size = 0; + + status = acpi_get_table(ACPI_SIG_UBRT, 0, &table); + if (ACPI_SUCCESS(status)) { + ubrt_table = (struct ubios_ubrt_table *)table; + for (i = 0; i < ubrt_table->count; i++) { + if (ubrt_table->sub_tables[i].type == UBRT_VIRTUAL_BUS) { + pr_info(LOG_PRE "find uvb table in ubrt table\n"); + header = memremap(ubrt_table->sub_tables[i].pointer, + sizeof(struct ubios_od_header), MEMREMAP_WB); + if (!header) { + pr_err(ERR_PRE "failed to map uvb table to od header in ACPI\n"); + return -ENOMEM; + } + sub_table_size = header->total_size; + memunmap(header); + sub_table = (u8 *)memremap(ubrt_table->sub_tables[i].pointer, + sub_table_size, MEMREMAP_WB); + break; + } + } + + if (!sub_table) { + pr_err(ERR_PRE "failed to get uvb table address in ACPI\n"); + return -ENOMEM; + } + pr_info(LOG_PRE "get uvb sub table suceess\n"); + + err = odf_get_list_from_table(sub_table, ODF_NAME_UVB, &uvb_list); + if (err) { + pr_err(ERR_PRE "create uvb info: find uvb from od failed, err = %d\n", err); + goto exit; + } + } else { + err = odf_get_list(od_root, ODF_FILE_NAME_VIRTUAL_BUS "/" ODF_NAME_UVB, &uvb_list); + if (err) { + pr_err(ERR_PRE "create uvb info: find uvb from od failed, err = %d\n", err); + return err; + } + } + + g_uvb_info = kzalloc(sizeof(struct uvb_info) + sizeof(void *) * uvb_list.count, GFP_KERNEL); + if (!g_uvb_info) { + err = -ENOMEM; + goto exit; + } + if (uvb_list.count > UVB_WINDOW_COUNT_MAX) { + pr_err(ERR_PRE "create uvb info: uvb count[%d] error.\n", uvb_list.count); + err = -EOVERFLOW; + goto exit; + } + g_uvb_info->uvb_count = (u8)uvb_list.count; + err = odf_get_data_from_list(&uvb_list, 0, &vs); + if (err) { + pr_err(ERR_PRE "create uvb info: get uvb failed [%d]\n", err); + goto exit; + } + for (i = 0; i < uvb_list.count; i++) { + g_uvb_info->uvbs[i] = create_uvb_from_vs(&vs); + if (!g_uvb_info->uvbs[i]) { + pr_err(ERR_PRE "create uvb from odf failed\n"); + err = -EINVAL; + goto exit; + } + (void)odf_next_in_list(&uvb_list, &vs); + } + if (sub_table) + memunmap(sub_table); + + pr_info(LOG_PRE "get uvb table from odf success\n"); + + return 0; +exit: + if (sub_table) + memunmap(sub_table); + + free_uvb_info(); + + return err; +} + +struct ubios_od_root *od_root; +EXPORT_SYMBOL(od_root); + +static void free_odf_info(void) +{ + kfree(od_root); + od_root = NULL; +} + +static int create_odf_info(void) +{ + u64 od_root_phys = 0; /* physical address */ + struct ubios_od_root *od_root_origin = NULL; /* virtual address */ + struct acpi_table_header *ubrt_header = NULL; + u32 od_root_size = 0; + int i = 0; + acpi_status status; + int ret = 0; + u16 count = 0; + + status = acpi_get_table(ACPI_SIG_UBRT, 0, &ubrt_header); + if (ACPI_SUCCESS(status)) { + pr_info(LOG_PRE "Success fully get UBRT table\n"); + return 0; + } + + od_root_origin = (struct ubios_od_root *) + memremap(od_root_phys, sizeof(struct ubios_od_header), MEMREMAP_WB); + if (!od_root_origin) { + pr_err(ERR_PRE "od_root header memremap failed, od_root addr=%016llx\n", od_root_phys); + goto fail; + } + od_root_size = od_root_origin->header.total_size; + memunmap((void *)od_root_origin); + + od_root_origin = (struct ubios_od_root *)memremap(od_root_phys, od_root_size, MEMREMAP_WB); + if (!od_root_origin) { + pr_err(ERR_PRE "od_root memremap failed, od_root addr=%016llx\n", od_root_phys); + goto fail; + } + + count = od_root_origin->count; + od_root = kzalloc(sizeof(struct ubios_od_root) + count * sizeof(u64), GFP_KERNEL); + if (!od_root) { + pr_err(ERR_PRE "kmalloc od_root failed\n"); + goto fail; + } + memcpy(&od_root->header, &od_root_origin->header, sizeof(struct ubios_od_header)); + od_root->count = od_root_origin->count; + + for (i = 0; i < od_root->count; i++) { + if (od_root_origin->odfs[i] == UBIOS_OD_EMPTY) + continue; + + od_root->odfs[i] = od_root_origin->odfs[i]; + } + if (od_root_origin) + memunmap(od_root_origin); + + odf_update_checksum(&od_root->header); + pr_info(LOG_PRE "get ubios table success\n"); + + return 0; + +fail: + free_odf_info(); + if (od_root_origin) + memunmap(od_root_origin); + + return -1; +} + +static int __init odf_init(void) +{ + int status; + + pr_info(LOG_PRE "start to odf init\n"); + status = create_odf_info(); + if (status) { + pr_err(ERR_PRE "odf table init failed\n"); + return -1; + } + + status = create_cis_info_from_odf(); + if (status) { + pr_err(ERR_PRE "create cis info failed, cis is invalid\n"); + return -1; + } + + status = create_uvb_info_from_odf(); + if (status) { + pr_err(ERR_PRE "create uvb info failed, uvb is invalid\n"); + return -1; + } + + pr_info(LOG_PRE "odf init success\n"); + + return 0; +} + +static void __exit odf_exit(void) +{ + free_uvb_info(); + free_cis_info(); + free_odf_info(); + + pr_info(LOG_PRE "odf exit success\n"); +} + +module_init(odf_init); +module_exit(odf_exit); -- Gitee From 61aa68260094f425e2e40db4de792c14cfa48f5b Mon Sep 17 00:00:00 2001 From: klmengkd Date: Tue, 4 Nov 2025 21:04:24 +0800 Subject: [PATCH 02/48] ubios_uvb: support parse ubios information by dtb commit 9c3097dcbec30da54bc5082cb3236b300770993d openEuler This patch support to parse ubios object description specification by dtb. Signed-off-by: Anonymous_Z Signed-off-by: klmengkd --- drivers/ubios_uvb/include/odf_interface.h | 2 ++ drivers/ubios_uvb/odf/odf_get_fdt.c | 34 +++++++++++++++++++++++ drivers/ubios_uvb/odf/odf_trans.c | 5 ++++ 3 files changed, 41 insertions(+) create mode 100644 drivers/ubios_uvb/odf/odf_get_fdt.c diff --git a/drivers/ubios_uvb/include/odf_interface.h b/drivers/ubios_uvb/include/odf_interface.h index 2ca2f591b04e..41b1fa4450d2 100644 --- a/drivers/ubios_uvb/include/odf_interface.h +++ b/drivers/ubios_uvb/include/odf_interface.h @@ -91,4 +91,6 @@ struct ubios_ubrt_table { struct ubrt_sub_tables sub_tables[]; }; +int odf_get_fdt_ubiostbl(u64 *phys_addr, char *tbl); + #endif diff --git a/drivers/ubios_uvb/odf/odf_get_fdt.c b/drivers/ubios_uvb/odf/odf_get_fdt.c new file mode 100644 index 000000000000..b683955e657c --- /dev/null +++ b/drivers/ubios_uvb/odf/odf_get_fdt.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: ODF get fdt info + * Author: mengkanglai + * Create: 2025-04-18 + */ +#include +#include +#include +#include +#include + +int odf_get_fdt_ubiostbl(u64 *phys_addr, char *tbl) +{ + int node, len; + const void *prop; + + node = fdt_path_offset(initial_boot_params, "/chosen"); + if (node < 0) { + pr_err("failed to get device tree chosen node\n"); + return -EINVAL; + } + prop = fdt_getprop(initial_boot_params, node, tbl, &len); + if (!prop) { + pr_err("failed to get property\n"); + return -EINVAL; + } + *phys_addr = (len == 4) ? (u64)be32_to_cpup((const u32 *)prop) : + get_unaligned_be64(prop); + + return 0; +} +EXPORT_SYMBOL(odf_get_fdt_ubiostbl); diff --git a/drivers/ubios_uvb/odf/odf_trans.c b/drivers/ubios_uvb/odf/odf_trans.c index 7f35fcaa84b0..dd5af5dd88bb 100644 --- a/drivers/ubios_uvb/odf/odf_trans.c +++ b/drivers/ubios_uvb/odf/odf_trans.c @@ -399,6 +399,11 @@ static int create_odf_info(void) pr_info(LOG_PRE "Success fully get UBRT table\n"); return 0; } + ret = odf_get_fdt_ubiostbl(&od_root_phys, "linux,ubiostbl"); + if (ret) { + pr_err(ERR_PRE "from fdt get ubiostbl failed\n"); + goto fail; + } od_root_origin = (struct ubios_od_root *) memremap(od_root_phys, sizeof(struct ubios_od_header), MEMREMAP_WB); -- Gitee From 0141f531a29d58e4ea0c96e13f01b2b3a631e22d Mon Sep 17 00:00:00 2001 From: klmengkd Date: Tue, 4 Nov 2025 21:24:07 +0800 Subject: [PATCH 03/48] ubios_uvb: support CIS framework send function commit 11397dec7b1306e514398618105b2d4cdd18bb48 openEuler This patch support cis framework send function, include callback function register and unregister, and cis send interface. Signed-off-by: Anonymous_Z Signed-off-by: klmengkd --- drivers/ubios_uvb/cis/Makefile | 9 + drivers/ubios_uvb/cis/cis_core.c | 176 +++++++ drivers/ubios_uvb/cis/cis_info_process.c | 240 ++++++++++ drivers/ubios_uvb/cis/cis_info_process.h | 60 +++ drivers/ubios_uvb/cis/io_param.c | 29 ++ drivers/ubios_uvb/cis/io_param.h | 17 + drivers/ubios_uvb/cis/uvb_info_process.c | 585 +++++++++++++++++++++++ drivers/ubios_uvb/cis/uvb_info_process.h | 49 ++ include/ubios/cis.h | 53 ++ 9 files changed, 1218 insertions(+) create mode 100644 drivers/ubios_uvb/cis/Makefile create mode 100644 drivers/ubios_uvb/cis/cis_core.c create mode 100644 drivers/ubios_uvb/cis/cis_info_process.c create mode 100644 drivers/ubios_uvb/cis/cis_info_process.h create mode 100644 drivers/ubios_uvb/cis/io_param.c create mode 100644 drivers/ubios_uvb/cis/io_param.h create mode 100644 drivers/ubios_uvb/cis/uvb_info_process.c create mode 100644 drivers/ubios_uvb/cis/uvb_info_process.h create mode 100644 include/ubios/cis.h diff --git a/drivers/ubios_uvb/cis/Makefile b/drivers/ubios_uvb/cis/Makefile new file mode 100644 index 000000000000..b41b0c361b47 --- /dev/null +++ b/drivers/ubios_uvb/cis/Makefile @@ -0,0 +1,9 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. +# Create : 2025-04-18 +# Description : cis Makefile + +obj-$(CONFIG_UDFI_CIS) += cis.o + +cis-objs := cis_info_process.o uvb_info_process.o cis_core.o io_param.o + +ccflags-y += -I$(srctree)/$(src)/../include diff --git a/drivers/ubios_uvb/cis/cis_core.c b/drivers/ubios_uvb/cis/cis_core.c new file mode 100644 index 000000000000..87df2053af21 --- /dev/null +++ b/drivers/ubios_uvb/cis/cis_core.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: Call ID Service (CIS) core module, manages inter-process communication + * via call identifiers with local/remote handling and UVB integration. + * Author: zhangrui + * Create: 2025-04-18 + */ + +#include +#include +#include + +#include "cis_info_process.h" +#include "uvb_info_process.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Call ID Service Framework"); + +static bool cis_call_for_me(u32 receiver_id) +{ + if ((receiver_id == UBIOS_USER_ID_ALL) || + (receiver_id == ubios_get_user_type(UBIOS_MY_USER_ID)) || + (receiver_id == UBIOS_MY_USER_ID)) { + return true; + } + + return false; +} + +static bool cis_call_for_local(u32 receiver_id) +{ + if ((ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_UB_DEVICE) || + (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_PCIE_DEVICE)) { + return true; + } + + return false; +} + +int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, + bool is_sync) +{ + u32 forwarder_id; + u32 exact_receiver_id; + u8 usage; + u8 index; + int res; + struct udfi_para para = { 0 }; + + pr_debug(LOG_PRE "cis remote call: call id %08x, sender id %08x, receiver id %08x\n", + call_id, sender_id, receiver_id); + res = get_cis_group_info(call_id, receiver_id, + &usage, &index, &exact_receiver_id, &forwarder_id); + if (res) { + pr_err(ERR_PRE "can't get group info, call id=%08x, receiver id=%08x\n", + call_id, receiver_id); + return -EOPNOTSUPP; + } + + para.input = msg->input; + para.input_size = msg->input_size; + para.output = msg->output; + para.output_size = msg->p_output_size; + para.message_id = call_id; + para.receiver_id = exact_receiver_id; + para.sender_id = sender_id; + para.forwarder_id = forwarder_id; + + if (usage != CIS_USAGE_UVB) { + pr_err(ERR_PRE "method not supported, call id=%08x, receiver id=%08x, usage=%d\n", + call_id, receiver_id, usage); + return -EOPNOTSUPP; + } + + if (is_sync) + return cis_call_uvb_sync(index, ¶); + + return cis_call_uvb(index, ¶); +} + +/** + * cis_call - Trigger a cis call with given aruguments. + * + * @call_id: call id that identifies which cis call will be triggered. + * @sender_id: user id of sender. + * @receiver_id: user id of receiver. + * @msg: the data that the user needs to transmit. + * @is_sync: whether to use a synchronous interface. + * + * Search for cia (call id attribute) in cis info with given call id and receiver id. + * The `usage` property of cia determines which method to used (uvb/arch call). + * Return 0 if cis call succeeds or communication method is not supported, + * else return cis error code. + */ +int cis_call_by_uvb(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, bool is_sync) +{ + int ret; + msg_handler func; + + pr_info(LOG_PRE "cis call: call id %08x, sender id %08x, receiver id %08x\n", + call_id, sender_id, receiver_id); + if (cis_call_for_me(receiver_id) || cis_call_for_local(receiver_id)) { + func = search_local_cis_func(call_id, receiver_id); + if (func) { + ret = func(msg); + if (ret) { + pr_err(ERR_PRE "cis call execute registered cis func failed\n"); + return ret; + } + pr_info(LOG_PRE "cis call execute registered cis func success\n"); + return 0; + } + pr_err(ERR_PRE "can't found cis func for callid=%08x, receiver_id=%08x\n", + call_id, receiver_id); + return -EOPNOTSUPP; + } + + return cis_call_remote(call_id, sender_id, receiver_id, msg, is_sync); +} +EXPORT_SYMBOL(cis_call_by_uvb); + +int cis_module_lock_func(int lock) +{ + if (lock) + return try_module_get(THIS_MODULE) ? 0 : -EINVAL; + + module_put(THIS_MODULE); + + return 0; +} +EXPORT_SYMBOL(cis_module_lock_func); + +static int __init cis_init(void) +{ + int err = 0; + + err = init_cis_table(); + if (err) { + pr_err(ERR_PRE "cis info init failed, err=%d\n", err); + goto fail; + } + + err = init_global_vars(); + if (err) { + pr_err(ERR_PRE "global vars malloc failed, err=%d\n", err); + goto free_global; + } + + err = init_uvb(); + if (err) { + pr_err(ERR_PRE "uvb init failed, err=%d\n", err); + goto fail; + } + + pr_info(LOG_PRE "cis init success\n"); + return 0; +fail: + uninit_uvb(); +free_global: + free_global_vars(); + + return err; +} + +static void __exit cis_exit(void) +{ + uninit_uvb(); + free_global_vars(); + pr_info(LOG_PRE "cis exit success\n"); +} + +module_init(cis_init); +module_exit(cis_exit); diff --git a/drivers/ubios_uvb/cis/cis_info_process.c b/drivers/ubios_uvb/cis/cis_info_process.c new file mode 100644 index 000000000000..285d40fcdae6 --- /dev/null +++ b/drivers/ubios_uvb/cis/cis_info_process.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: Call ID Service (CIS) info processing module, handles CIS init, + * func register/lookup and group info retrieval. + * Author: zhangrui + * Create: 2025-04-18 + */ + +#include +#include +#include +#include +#include +#include "cis_info_process.h" + +LIST_HEAD(g_local_cis_list); +DEFINE_SPINLOCK(cis_register_lock); +struct cis_message *io_param_sync; + +int init_cis_table(void) +{ + if (!g_cis_info) { + pr_err(ERR_PRE "failed to get cis info from odf\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +int init_global_vars(void) +{ + io_param_sync = kzalloc(sizeof(struct cis_message), GFP_KERNEL); + if (!io_param_sync) + return -ENOMEM; + + return 0; +} + +void free_global_vars(void) +{ + kfree(io_param_sync); + io_param_sync = NULL; +} + +static bool is_call_id_supported(struct cis_group *group, u32 call_id) +{ + u32 i; + + for (i = 0; i < group->cis_count; i++) { + pr_debug(LOG_PRE "cia call_id: %08x\n", group->call_id[i]); + if (group->call_id[i] == call_id) + return true; + } + + return false; +} + +int get_cis_group_info(u32 call_id, u32 receiver_id, + u8 *usage, u8 *index, + u32 *exact_receiver_id, u32 *forwarder_id) +{ + u32 i; + + if (!g_cis_info) { + pr_err(ERR_PRE "can't get cis_info from odf\n"); + return -EOPNOTSUPP; + } + + for (i = 0; i < g_cis_info->group_count; i++) { + if (receiver_id != g_cis_info->groups[i]->owner_user_id && + receiver_id != ubios_get_user_type(g_cis_info->groups[i]->owner_user_id)) + continue; + if (is_call_id_supported(g_cis_info->groups[i], call_id)) { + *usage = g_cis_info->groups[i]->usage; + *index = g_cis_info->groups[i]->index; + *exact_receiver_id = g_cis_info->groups[i]->owner_user_id; + *forwarder_id = g_cis_info->groups[i]->forwarder_id; + return 0; + } + } + + if (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_UB_DEVICE) { + *usage = g_cis_info->ub.usage; + *index = g_cis_info->ub.index; + *exact_receiver_id = receiver_id; + *forwarder_id = g_cis_info->ub.forwarder_id; + pr_info(LOG_PRE "refresh info, usage=%d, index=%d, forward_id=%08x\n", + *usage, *index, *forwarder_id); + return 0; + } + + pr_err(ERR_PRE "call id: %08x not supported\n", call_id); + + return -EOPNOTSUPP; +} + +/* +Search Call ID Service owned by this component, return the function. +*/ +struct cis_func_node *search_local_cis_func_node(u32 call_id, u32 receiver_id) +{ + struct cis_func_node *cis_node = NULL; + struct cis_func_node *tmp; + + rcu_read_lock(); + list_for_each_entry_rcu(tmp, &g_local_cis_list, link) { + if ((tmp->call_id == call_id) && (tmp->receiver_id == receiver_id)) { + cis_node = tmp; + break; + } + } + rcu_read_unlock(); + + return cis_node; +} + +/* +Search local Call ID Service Functon according Call ID, return the function. +*/ +msg_handler search_local_cis_func(u32 call_id, u32 receiver_id) +{ + struct cis_func_node *cis_node; + + cis_node = search_local_cis_func_node(call_id, receiver_id); + if (cis_node) + return cis_node->func; + + return NULL; +} + +/* +Search Call ID Service owned by this component, return the function. +*/ +msg_handler search_my_cis_func(u32 call_id) +{ + return search_local_cis_func(call_id, UBIOS_MY_USER_ID); +} + +/* +Register a Call ID Service +@call_id - UBIOS Interface ID +@receiver_id - UBIOS User ID who own this CIS +@func - Callback function of Call ID +*/ +int register_local_cis_func(u32 call_id, u32 receiver_id, msg_handler func) +{ + struct cis_func_node *p; + unsigned long flags; + + pr_info(LOG_PRE "cis register: call_id[%08x], receiver_id[%08x]\n", call_id, receiver_id); + if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { + pr_err(ERR_PRE "register is not uvb call\n"); + return -EINVAL; + } + if (!func) { + pr_err(ERR_PRE "register func is NULL\n"); + return -EINVAL; + } + + /* check is this Call ID already has a funciton */ + if (search_local_cis_func_node(call_id, receiver_id)) { + pr_err(ERR_PRE "cis register: call_id[%08x], receiver_id[%08x], already register func\n", + call_id, receiver_id); + return -EINVAL; + } + + p = kcalloc(1, sizeof(struct cis_func_node), GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->call_id = call_id; + p->receiver_id = receiver_id; + p->func = func; + + spin_lock_irqsave(&cis_register_lock, flags); + list_add_tail_rcu(&p->link, &g_local_cis_list); + spin_unlock_irqrestore(&cis_register_lock, flags); + pr_info(LOG_PRE "register cis func success\n"); + + return 0; +} +EXPORT_SYMBOL(register_local_cis_func); + +/* +Register a Call ID Service owned by this component +@call_id - UBIOS Interface ID +@func - Callback function of Call ID +*/ +int register_my_cis_func(u32 call_id, msg_handler func) +{ + return register_local_cis_func(call_id, UBIOS_MY_USER_ID, func); +} +EXPORT_SYMBOL(register_my_cis_func); + + +/* +Unregister a Call ID Service +@call_id - UBIOS Interface ID +@receiver_id - UBIOS User ID who own this CIS +*/ +int unregister_local_cis_func(u32 call_id, u32 receiver_id) +{ + struct cis_func_node *p; + unsigned long flags; + + pr_info(LOG_PRE "cis unregister: call_id[%08x], receiver_id[%08x]\n", call_id, receiver_id); + if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { + pr_err(ERR_PRE "register is not uvb call\n"); + return -EINVAL; + } + + p = search_local_cis_func_node(call_id, receiver_id); + if (!p) { + pr_err(ERR_PRE "cis unregister: call_id[%08x], receiver_id[%08x] not find func node.\n", + call_id, receiver_id); + return -EINVAL; + } + + spin_lock_irqsave(&cis_register_lock, flags); + list_del_rcu(&p->link); + spin_unlock_irqrestore(&cis_register_lock, flags); + synchronize_rcu(); + + kfree(p); + pr_info(LOG_PRE "unregister cis func success\n"); + + return 0; +} +EXPORT_SYMBOL(unregister_local_cis_func); + +/* +Unregister a Call ID Service owned by this component +@call_id - UBIOS Interface ID +*/ +int unregister_my_cis_func(u32 call_id) +{ + return unregister_local_cis_func(call_id, UBIOS_MY_USER_ID); +} +EXPORT_SYMBOL(unregister_my_cis_func); diff --git a/drivers/ubios_uvb/cis/cis_info_process.h b/drivers/ubios_uvb/cis/cis_info_process.h new file mode 100644 index 000000000000..2ddf071699f5 --- /dev/null +++ b/drivers/ubios_uvb/cis/cis_info_process.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: cis info process header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef CIS_INFO_PROCESS_H +#define CIS_INFO_PROCESS_H + +#include +#include "cis_uvb_interface.h" + +extern struct cis_message *io_param_sync; +extern struct list_head g_local_cis_list; +extern spinlock_t cis_register_lock; + +struct udfi_para { + u32 message_id; + u32 sender_id; + u32 receiver_id; + u32 forwarder_id; + void *input; + u32 input_size; + void *output; + u32 *output_size; +}; + +struct cis_func_node { + struct list_head link; + u32 call_id; + u32 receiver_id; + msg_handler func; +}; + +int init_cis_table(void); +int init_global_vars(void); +void free_global_vars(void); + +int get_cis_group_info(u32 call_id, u32 receiver_id, + u8 *usage, u8 *index, + u32 *exact_receiver_id, u32 *forwarder_id); +int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, + bool is_sync); +msg_handler search_my_cis_func(u32 call_id); +msg_handler search_local_cis_func(u32 call_id, u32 receiver_id); + +static inline u32 ubios_get_user_type(u32 user_id) +{ + return user_id & UBIOS_USER_TYPE_MASK; +} +static inline u32 ubios_get_user_index(u32 user_id) +{ + return user_id & UBIOS_USER_INDEX_MASK; +} + +#endif diff --git a/drivers/ubios_uvb/cis/io_param.c b/drivers/ubios_uvb/cis/io_param.c new file mode 100644 index 000000000000..a38bf37ec450 --- /dev/null +++ b/drivers/ubios_uvb/cis/io_param.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: CIS message processing, handles output preparation. + * Author: zhangrui + * Create: 2025-04-18 + */ + +#include +#include +#include "io_param.h" + +void ubios_uvb_free_io_param(struct cis_message *param, u8 free_flag) +{ + if (free_flag == 1 && param->input) + kfree(param->input); + if (free_flag == 1 && param->output) + kfree(param->output); + if (free_flag == 0 && param->input) + memunmap(param->input); + + kfree(param); +} + +void ubios_prepare_output_data(struct cis_message *io_param, void *output, u32 *output_size) +{ + memcpy(output, io_param->output, *(io_param->p_output_size)); + *output_size = *(io_param->p_output_size); +} diff --git a/drivers/ubios_uvb/cis/io_param.h b/drivers/ubios_uvb/cis/io_param.h new file mode 100644 index 000000000000..9c83dc242fad --- /dev/null +++ b/drivers/ubios_uvb/cis/io_param.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: io param header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef UBIOS_IO_PARAM_H +#define UBIOS_IO_PARAM_H + +#include + +void ubios_uvb_free_io_param(struct cis_message *param, u8 free_flag); +void ubios_prepare_output_data(struct cis_message *io_param, void *output, u32 *output_size); + +#endif diff --git a/drivers/ubios_uvb/cis/uvb_info_process.c b/drivers/ubios_uvb/cis/uvb_info_process.c new file mode 100644 index 000000000000..917990bb1c24 --- /dev/null +++ b/drivers/ubios_uvb/cis/uvb_info_process.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: UVB info processing module, handles init and window polling. + * Author: zhangrui + * Create: 2025-04-18 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cis_info_process.h" +#include "io_param.h" +#include "uvb_info_process.h" + +DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); + +int cis_call_uvb_sync(u8 index, struct udfi_para *para) +{ + int err; + struct uvb_window *window = NULL; + struct uvb_window_description *wd = NULL; + u64 *wd_obtain = NULL; + + if (!g_uvb_info) { + pr_err(ERR_PRE "uvb unsupported\n"); + return -EOPNOTSUPP; + } + + if (index >= g_uvb_info->uvb_count) { + pr_err(ERR_PRE "cis call sync use uvb index exceed\n"); + return -EOVERFLOW; + } + + pr_info(LOG_PRE "start to do sync cis call by uvb\n"); + wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); + if (!wd) { + pr_err(ERR_PRE "obtain window failed\n"); + err = -EBUSY; + goto free_resources; + } + + window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err(ERR_PRE "convert window addr from pa to va failed\n"); + err = -ENOMEM; + goto free_resources; + } + + err = uvb_fill_window(wd, window, io_param_sync, para); + if (err) { + pr_err(ERR_PRE "fill uvb window failed\n"); + goto free_resources; + } + + err = uvb_poll_window_call_sync(window, para->message_id); + if (err) { + pr_err(ERR_PRE "sync call by uvb failed\n"); + goto free_resources; + } + + err = uvb_get_output_data(window, io_param_sync, para->output, para->output_size); + if (err) + pr_err(ERR_PRE "uvb get output data failed\n"); + +free_resources: + if (wd->buffer && io_param_sync->input) + memunmap(io_param_sync->input); + + memset(io_param_sync, 0, sizeof(struct cis_message)); + + if (window) + uvb_free_window(window); + + if (wd_obtain) { + *wd_obtain = 0; + memunmap(wd_obtain); + } + pr_info(LOG_PRE "finish cis call by uvb sync\n"); + + return err; +} + +int cis_call_uvb(u8 index, struct udfi_para *para) +{ + int err; + struct uvb_window *window = NULL; + struct uvb_window_description *wd = NULL; + struct cis_message *io_param = NULL; + u64 *wd_obtain = NULL; + + if (!g_uvb_info) { + pr_err(ERR_PRE "uvb unsupported\n"); + return -EOPNOTSUPP; + } + + if (index >= g_uvb_info->uvb_count) { + pr_err(ERR_PRE "cis call use uvb index exceed\n"); + return -EOVERFLOW; + } + + pr_info(LOG_PRE "start to do cis call by uvb\n"); + wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); + if (!wd) { + pr_err(ERR_PRE "obtain window failed\n"); + err = -EBUSY; + goto free_resources; + } + + window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err(ERR_PRE "convert window addr from pa to va failed\n"); + err = -ENOMEM; + goto free_resources; + } + + io_param = kzalloc(sizeof(struct cis_message), GFP_KERNEL); + if (!io_param) { + err = -ENOMEM; + goto free_resources; + } + err = uvb_fill_window(wd, window, io_param, para); + if (err) { + pr_err(ERR_PRE "fill uvb window failed\n"); + goto free_resources; + } + + err = uvb_poll_window_call(window, para->message_id); + if (err) { + pr_err(ERR_PRE "call by uvb failed\n"); + goto free_resources; + } + + err = uvb_get_output_data(window, io_param, para->output, para->output_size); + if (err) + pr_err(ERR_PRE "uvb get output data failed\n"); + +free_resources: + if (io_param) + ubios_uvb_free_io_param(io_param, (wd->buffer == 0)); + + if (window) { + uvb_free_window(window); + memunmap(window); + } + if (wd_obtain) { + *wd_obtain = 0; + memunmap(wd_obtain); + } + pr_info(LOG_PRE "finish cis call by uvb\n"); + + return err; +} + +/** +Calculate checksum in 4bytes, if size not aligned with 4bytes, padding with 0. +*/ +static u32 checksum32(const void *data, u32 size) +{ + u64 i; + u64 sum = 0; + u32 remainder = size % sizeof(u32); + u32 *p = (u32 *)data; + u32 restsize = size - remainder; + + if (!data) + return (u32)-1; + + for (i = 0; i < restsize; i += sizeof(u32)) { + sum += *p; + p++; + } + + switch (remainder) { + case 1: + sum += (*p) & 0x000000FF; + break; + case 2: + sum += (*p) & 0x0000FFFF; + break; + case 3: + sum += (*p) & 0x00FFFFFF; + break; + default: + break; + } + + return (u32)(sum); +} + +static void free_uvb_window_lock(void) +{ + struct uvb_window_lock *entry; + struct hlist_node *tmp; + u32 bkt; + + if (hash_empty(uvb_lock_table)) + return; + + hash_for_each_safe(uvb_lock_table, bkt, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +static int uvb_window_lock_init(void) +{ + struct uvb *uvb; + struct uvb_window_lock *lock_node; + u16 i; + u16 j; + + for (i = 0; i < g_uvb_info->uvb_count; i++) { + uvb = g_uvb_info->uvbs[i]; + for (j = 0; j < uvb->window_count; j++) { + lock_node = kzalloc(sizeof(struct uvb_window_lock), GFP_KERNEL); + if (!lock_node) { + free_uvb_window_lock(); + return -ENOMEM; + } + lock_node->lock.counter = 0; + lock_node->window_address = uvb->wd[j].address; + hash_add(uvb_lock_table, &lock_node->node, uvb->wd[j].address); + } + } + pr_info(LOG_PRE "uvb window lock init success.\n"); + + return 0; +} + +static void uvb_return_status(struct uvb_window *window, int status) +{ + window->returned_status = (u32)status; + window->message_id = ~window->message_id; +} + +bool search_local_receiver_id(u32 receiver_id) +{ + bool found = false; + struct cis_func_node *cis_node; + + rcu_read_lock(); + list_for_each_entry_rcu(cis_node, &g_local_cis_list, link) { + if (cis_node->receiver_id == receiver_id) { + found = true; + break; + } + } + rcu_read_unlock(); + + return found; +} + +void uninit_uvb(void) +{ + free_uvb_window_lock(); +} + +int init_uvb(void) +{ + int err = 0; + + if (!g_uvb_info) { + pr_warn(LOG_PRE "uvb is invalid, please try to use smc\n"); + return -EOPNOTSUPP; + } + + err = uvb_window_lock_init(); + if (err) { + pr_err(ERR_PRE "Init uvb window lock failed\n"); + return err; + } + + return err; +} + +static atomic_t *find_uvb_window_lock(u64 window_address) +{ + struct uvb_window_lock *entry; + + if (hash_empty(uvb_lock_table)) + return NULL; + + hash_for_each_possible(uvb_lock_table, entry, node, window_address) { + if (entry->window_address == window_address) + return &entry->lock; + } + + return NULL; +} + +static int try_obtain_uvb_window(u64 *wd_obtain, u32 sender_id) +{ + if (*wd_obtain == 0) { + *wd_obtain = sender_id; + return 1; + } + return 0; +} + +struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, u64 *wd_obtain) +{ + struct uvb_window_description *wd = NULL; + ktime_t start; + ktime_t now; + atomic_t *lock; + s64 time_interval; + u32 i; + u32 round; + + i = 0; + round = 0; + start = ktime_get(); + while (1) { + if (i >= uvb->window_count) { + i = 0; + round++; + } + wd = &(uvb->wd[i]); + wd_obtain = memremap(wd->obtain, wd->size, MEMREMAP_WC); + if (!wd_obtain) { + pr_err(ERR_PRE "uvb window obtain map failed\n"); + return NULL; + } + lock = find_uvb_window_lock(wd->address); + if (!lock) { + pr_err(ERR_PRE "uvb window lock not found\n"); + goto free_resources; + } + + if (atomic_cmpxchg(lock, 0, 1) == 0 + && try_obtain_uvb_window(wd_obtain, sender_id)) { + atomic_set(lock, 0); + udelay(uvb->delay); + if (*wd_obtain == sender_id) { + now = ktime_get(); + time_interval = ktime_to_us(ktime_sub(now, start)); + pr_info(LOG_PRE "occupy uvb window successfully, elapsed time: %lldus\n", + time_interval); + return wd; + } + } + + now = ktime_get(); + time_interval = ktime_to_us(ktime_sub(now, start)); + if (round > 1 && time_interval > UVB_TIMEOUT_WINDOW_OBTAIN) { + pr_err(ERR_PRE "obtain window timeout, tried %u * %u = %u times\n", + round, (u32)(uvb->window_count), round * (u32)(uvb->window_count)); + goto free_resources; + } + i++; + memunmap(wd_obtain); + wd_obtain = NULL; + } + +free_resources: + memunmap(wd_obtain); + wd_obtain = NULL; + + return NULL; +} + +int uvb_free_window(struct uvb_window *window) +{ + window->input_data_address = 0; + window->input_data_size = 0; + window->input_data_checksum = 0; + + window->output_data_address = 0; + window->output_data_size = 0; + window->output_data_checksum = 0; + window->returned_status = 0; + window->message_id = 0; + + dsb(sy); + isb(); + + window->receiver_id = 0; + window->sender_id = 0; + + return 0; +} + +static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, + struct uvb_window *window_address, + struct cis_message *io_params, + void *input, u32 input_size, + void *output, u32 *output_size) +{ + struct uvb_window *window; + void *new_input = NULL; + void *new_output = NULL; + + window = window_address; + if (input) { + new_input = memremap(wd->buffer, wd->size, MEMREMAP_WC); + if (!new_input) { + pr_err(ERR_PRE "memremap for wd_buffer_virt_addr failed\n"); + return -ENOMEM; + } + memcpy(new_input, input, input_size); + window->input_data_checksum = checksum32(input, input_size); + } + + if (output) + new_output = (void *)(new_input + input_size); + + if (output_size) { + if (wd->size < *output_size + input_size) + return -EOVERFLOW; + window->output_data_size = *output_size; + } + + io_params->input = new_input; + io_params->input_size = input_size; + io_params->output = new_output; + io_params->p_output_size = &(window->output_data_size); + + window->input_data_address = new_input ? wd->buffer : 0; + window->input_data_size = input_size; + window->output_data_address = new_output ? wd->buffer + input_size : 0; + + return 0; +} + +static int fill_uvb_window_without_buffer(struct uvb_window *window_address, + struct cis_message *io_params, + void *input, u32 input_size, + void *output, u32 *output_size) +{ + int err = 0; + struct uvb_window *window; + void *input_kloc = NULL; + void *output_kloc = NULL; + + window = window_address; + if (input) { + input_kloc = kzalloc(input_size, GFP_KERNEL); + if (!input_kloc) { + err = -ENOMEM; + goto fail; + } + memcpy(input_kloc, input, input_size); + window->input_data_checksum = checksum32(input, input_size); + } + if (output) { + output_kloc = kzalloc(*output_size, GFP_KERNEL); + if (!output_kloc) { + err = -ENOMEM; + goto fail; + } + memcpy(output_kloc, output, *output_size); + } + if (output_size) + window->output_data_size = *output_size; + + io_params->input = input_kloc; + io_params->input_size = input_size; + io_params->output = output_kloc; + io_params->p_output_size = &(window->output_data_size); + + window->input_data_address = input_kloc ? (u64)virt_to_phys(input_kloc) : 0; + window->input_data_size = input_size; + window->output_data_address = output_kloc ? virt_to_phys(output_kloc) : 0; + return 0; + +fail: + kfree(input_kloc); + kfree(output_kloc); + + return err; +} + +int uvb_fill_window(struct uvb_window_description *wd, struct uvb_window *wd_addr, + struct cis_message *io_params, struct udfi_para *para) +{ + int err; + struct uvb_window *window; + + window = wd_addr; + window->message_id = para->message_id; + window->sender_id = para->sender_id; + + if (wd->buffer == 0) { + err = fill_uvb_window_without_buffer(window, io_params, para->input, + para->input_size, para->output, para->output_size); + if (err) { + pr_err(ERR_PRE "fill uvb window without buffer failed\n"); + goto fail; + } + } else { + err = fill_uvb_window_with_buffer(wd, window, io_params, para->input, + para->input_size, para->output, para->output_size); + if (err) { + pr_err(ERR_PRE "fill uvb window with buffer failed\n"); + goto fail; + } + } + + window->receiver_id = para->receiver_id; + window->forwarder_id = para->forwarder_id; + pr_info(LOG_PRE "uvb fill window success\n"); + + return 0; +fail: + return err; +} + +int uvb_poll_window_call(struct uvb_window *window, u32 call_id) +{ + ktime_t start; + ktime_t now; + s64 time_interval; + + start = ktime_get(); + while (1) { + if (window->message_id == ~call_id) { + pr_info(LOG_PRE "window message id seted to 0x%08x\n", window->message_id); + return (int)window->returned_status; + } + now = ktime_get(); + time_interval = ktime_to_ms(ktime_sub(now, start)); + if (time_interval > UVB_POLL_TIMEOUT) + break; + } + + pr_err(ERR_PRE "uvb poll window call timeout,wait=%lld ms\n", time_interval); + + return -ETIMEDOUT; +} + +int uvb_poll_window_call_sync(struct uvb_window *window, u32 call_id) +{ + int i; + + pr_info(LOG_PRE "start uvb window polling\n"); + for (i = 0; i < UVB_POLL_TIMEOUT_TIMES; i++) { + if (window->message_id == ~call_id) { + pr_info(LOG_PRE "window message id seted to 0x%08x\n", window->message_id); + return (int)window->returned_status; + } + udelay(UVB_POLL_TIME_INTERVAL); + } + + pr_err(ERR_PRE "uvb poll window call sync timeout\n"); + + return -ETIMEDOUT; +} + +int uvb_get_output_data(struct uvb_window *window, + struct cis_message *io_param, void *output, u32 *output_size) +{ + if (!output || !output_size) + return 0; + + if (*output_size == 0) + return 0; + + if (window->output_data_address == 0 || window->output_data_size == UVB_OUTPUT_SIZE_NULL) + return 0; + + if (window->output_data_checksum != + checksum32(io_param->output, window->output_data_size)) { + pr_warn(LOG_PRE "returned data checksum error\n"); + return -EINVAL; + } + ubios_prepare_output_data(io_param, output, output_size); + + return 0; +} diff --git a/drivers/ubios_uvb/cis/uvb_info_process.h b/drivers/ubios_uvb/cis/uvb_info_process.h new file mode 100644 index 000000000000..dd2b758af1e7 --- /dev/null +++ b/drivers/ubios_uvb/cis/uvb_info_process.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: uvb info process header + * Author: zhangrui + * Create: 2025-04-18 + */ + +#ifndef UVB_INFO_PROCESS_H +#define UVB_INFO_PROCESS_H + +#include +#include +#include + +#include "cis_uvb_interface.h" + +#define CIS_USAGE_UVB 2 + +#define UVB_POLL_TIME_INTERVAL (100) /* 100us */ +#define UVB_POLL_TIMEOUT (1000) /* 1000ms */ +#define UVB_TIMEOUT_WINDOW_OBTAIN (10000) /* 10000ms */ +#define UVB_POLL_TIMEOUT_TIMES (10000) /* 10000 times */ + +int init_uvb(void); +void uninit_uvb(void); +void uninit_uvb_sync(void); + +#define MAX_UVB_LOCK_IN_BITS 8 +struct uvb_window_lock { + atomic_t lock; + u64 window_address; + struct hlist_node node; +}; +extern DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); + +struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, u64 *wd_obtain); +int uvb_free_window(struct uvb_window *window); +int uvb_fill_window(struct uvb_window_description *wd, struct uvb_window *wd_addr, + struct cis_message *io_params, struct udfi_para *para); +int uvb_poll_window_call(struct uvb_window *window, u32 call_id); +int uvb_poll_window_call_sync(struct uvb_window *window, u32 call_id); +int uvb_get_output_data(struct uvb_window *window, + struct cis_message *io_param, void *output, u32 *output_size); + +/* cis call by uvb */ +int cis_call_uvb(u8 index, struct udfi_para *para); +int cis_call_uvb_sync(u8 index, struct udfi_para *para); +#endif diff --git a/include/ubios/cis.h b/include/ubios/cis.h new file mode 100644 index 000000000000..d8d48c9b5762 --- /dev/null +++ b/include/ubios/cis.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: cis header + * Create: 2025-04-18 + */ + +#ifndef CIS_H +#define CIS_H +#include + +// Call ID +#define UBIOS_CALL_ID_FLAG 0x3 +#define UBIOS_CALL_ID_PANIC_CALL 0xc00b2010 +#define UBIOS_CALL_ID_GET_DEVICE_INFO 0xc00b0b26 + +// User ID format +#define UBIOS_USER_ID_NO (0x00 << 24) +#define UBIOS_USER_ID_BIOS (0x01 << 24) +#define UBIOS_USER_ID_BMC (0x0B << 24) +#define UBIOS_USER_ID_UB_DEVICE (0x10 << 24) +#define UBIOS_USER_ID_INTERGRATED_UB_DEVICE (0x11 << 24) +#define UBIOS_USER_ID_RICH_OS (0x20 << 24) +#define UBIOS_USER_ID_TRUST_OS (0x30 << 24) +#define UBIOS_USER_ID_PCIE_DEVICE (0x40 << 24) +#define UBIOS_USER_ID_INTERGRATED_PCIE_DEVICE (0x41 << 24) +#define UBIOS_USER_ID_ALL (0xFF << 24) +#define UBIOS_USER_TYPE_MASK UBIOS_USER_ID_ALL +#define UBIOS_USER_INDEX_MASK ((u32)(~UBIOS_USER_TYPE_MASK)) + +#define UBIOS_MY_USER_ID UBIOS_USER_ID_INTERGRATED_UB_DEVICE +#define UBIOS_GET_MESSAGE_FLAG(user_id) ((u32)((user_id) >> 30)) + +struct cis_message { + void *input; + u32 input_size; + void *output; + u32 *p_output_size; +}; + +// cis call +int cis_call_by_uvb(u32 call_id, u32 sender_id, + u32 receiver_id, struct cis_message *msg, bool is_sync); + +// cis register +typedef int (*msg_handler)(struct cis_message *msg); +int register_local_cis_func(u32 call_id, u32 receiver_id, msg_handler func); +int register_my_cis_func(u32 call_id, msg_handler func); +int unregister_local_cis_func(u32 call_id, u32 receiver_id); +int unregister_my_cis_func(u32 call_id); +int cis_module_lock_func(int lock); + +#endif -- Gitee From 98187eb8bebd246b21b74e424b4ea1418c1cb043 Mon Sep 17 00:00:00 2001 From: klmengkd Date: Tue, 4 Nov 2025 21:32:37 +0800 Subject: [PATCH 04/48] ubios_uvb: support CIS framework receive function commit c477b8f076e2b3385ad93a517136b0066a691813 openEuler This patch support cis framework receive function. cis will create a thread to poll uvb window, if data come poll thread will deal it and pass it to registered callback function. Signed-off-by: Anonymous_Z Signed-off-by: klmengkd --- arch/arm64/configs/tencent.config | 4 + drivers/Kconfig | 1 - drivers/Makefile | 1 - drivers/firmware/Kconfig | 1 + drivers/firmware/Makefile | 1 + drivers/{ => firmware}/ubios_uvb/Kconfig | 0 .../cis => firmware/ubios_uvb}/Makefile | 15 +- drivers/firmware/ubios_uvb/cis/cis_core.c | 180 +++++ .../ubios_uvb/cis/cis_info_process.c} | 749 ++++++++++-------- .../ubios_uvb/cis/cis_info_process.h | 22 +- .../firmware/ubios_uvb/cis/uvb_info_process.c | 359 +++++++++ .../ubios_uvb/cis/uvb_info_process.h} | 13 +- .../ubios_uvb/include/cis_uvb_interface.h | 5 +- .../{ => firmware}/ubios_uvb/odf/odf_data.c | 38 +- .../ubios_uvb/odf/odf_get_fdt.c | 0 .../ubios_uvb/odf/odf_handle.h} | 4 +- .../{ => firmware}/ubios_uvb/odf/odf_helper.c | 75 +- .../ubios_uvb/odf}/odf_interface.h | 57 +- .../{ => firmware}/ubios_uvb/odf/odf_trans.c | 166 ++-- drivers/ubios_uvb/Makefile | 6 - drivers/ubios_uvb/cis/cis_core.c | 176 ---- drivers/ubios_uvb/cis/cis_info_process.c | 240 ------ drivers/ubios_uvb/cis/io_param.c | 29 - drivers/ubios_uvb/cis/uvb_info_process.h | 49 -- drivers/ubios_uvb/odf/Makefile | 10 - drivers/ubios_uvb/odf/include/libodf.h | 38 - drivers/ubios_uvb/odf/include/odf_trans.h | 34 - drivers/ubios_uvb/odf/odf_file.c | 76 -- include/{ => linux/firmware}/ubios/cis.h | 4 +- 29 files changed, 1232 insertions(+), 1121 deletions(-) rename drivers/{ => firmware}/ubios_uvb/Kconfig (100%) rename drivers/{ubios_uvb/cis => firmware/ubios_uvb}/Makefile (31%) create mode 100644 drivers/firmware/ubios_uvb/cis/cis_core.c rename drivers/{ubios_uvb/cis/uvb_info_process.c => firmware/ubios_uvb/cis/cis_info_process.c} (42%) rename drivers/{ => firmware}/ubios_uvb/cis/cis_info_process.h (64%) create mode 100644 drivers/firmware/ubios_uvb/cis/uvb_info_process.c rename drivers/{ubios_uvb/cis/io_param.h => firmware/ubios_uvb/cis/uvb_info_process.h} (39%) rename drivers/{ => firmware}/ubios_uvb/include/cis_uvb_interface.h (96%) rename drivers/{ => firmware}/ubios_uvb/odf/odf_data.c (94%) rename drivers/{ => firmware}/ubios_uvb/odf/odf_get_fdt.c (100%) rename drivers/{ubios_uvb/odf/include/libodf_handle.h => firmware/ubios_uvb/odf/odf_handle.h} (99%) rename drivers/{ => firmware}/ubios_uvb/odf/odf_helper.c (76%) rename drivers/{ubios_uvb/include => firmware/ubios_uvb/odf}/odf_interface.h (46%) rename drivers/{ => firmware}/ubios_uvb/odf/odf_trans.c (73%) delete mode 100644 drivers/ubios_uvb/Makefile delete mode 100644 drivers/ubios_uvb/cis/cis_core.c delete mode 100644 drivers/ubios_uvb/cis/cis_info_process.c delete mode 100644 drivers/ubios_uvb/cis/io_param.c delete mode 100644 drivers/ubios_uvb/cis/uvb_info_process.h delete mode 100644 drivers/ubios_uvb/odf/Makefile delete mode 100644 drivers/ubios_uvb/odf/include/libodf.h delete mode 100644 drivers/ubios_uvb/odf/include/odf_trans.h delete mode 100644 drivers/ubios_uvb/odf/odf_file.c rename include/{ => linux/firmware}/ubios/cis.h (92%) diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 4268a2642279..05f77ddfcb27 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1804,6 +1804,10 @@ CONFIG_ARM_SMMU_V3_HTTU=y CONFIG_ARM_SMMU_V3_ECMDQ=y CONFIG_ARM_SMMU_V3_IOMMUFD=y +CONFIG_UDFI=y +CONFIG_UDFI_CIS=m +CONFIG_UDFI_ODF=m + # # unified bus # diff --git a/drivers/Kconfig b/drivers/Kconfig index 10fa9c700a9d..bfb2bdb00477 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -248,5 +248,4 @@ source "drivers/cpuinspect/Kconfig" source "drivers/thirdparty/Kconfig" -source "drivers/ubios_uvb/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 01a48436281d..800793aafbbb 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -201,4 +201,3 @@ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ obj-$(CONFIG_S390) += s390/ -obj-$(CONFIG_UDFI) += ubios_uvb/ diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 11e8d19658aa..68edaa3d6846 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -314,5 +314,6 @@ source "drivers/firmware/psci/Kconfig" source "drivers/firmware/smccc/Kconfig" source "drivers/firmware/tegra/Kconfig" source "drivers/firmware/xilinx/Kconfig" +source "drivers/firmware/ubios_uvb/Kconfig" endmenu diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 28fcddcd688f..c60933fa11cb 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -38,3 +38,4 @@ obj-y += psci/ obj-y += smccc/ obj-y += tegra/ obj-y += xilinx/ +obj-$(CONFIG_UDFI) += ubios_uvb/ diff --git a/drivers/ubios_uvb/Kconfig b/drivers/firmware/ubios_uvb/Kconfig similarity index 100% rename from drivers/ubios_uvb/Kconfig rename to drivers/firmware/ubios_uvb/Kconfig diff --git a/drivers/ubios_uvb/cis/Makefile b/drivers/firmware/ubios_uvb/Makefile similarity index 31% rename from drivers/ubios_uvb/cis/Makefile rename to drivers/firmware/ubios_uvb/Makefile index b41b0c361b47..ccb8b026c819 100644 --- a/drivers/ubios_uvb/cis/Makefile +++ b/drivers/firmware/ubios_uvb/Makefile @@ -1,9 +1,16 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. # Create : 2025-04-18 -# Description : cis Makefile +# Description : cis odf Makefile -obj-$(CONFIG_UDFI_CIS) += cis.o +ccflags-y += -I$(srctree)/drivers/firmware/ubios_uvb/include -cis-objs := cis_info_process.o uvb_info_process.o cis_core.o io_param.o +obj-$(CONFIG_UDFI) = odf/odf_get_fdt.o +obj-$(CONFIG_UDFI_ODF) += odf.o +odf-objs := odf/odf_trans.o \ + odf/odf_data.o \ + odf/odf_helper.o -ccflags-y += -I$(srctree)/$(src)/../include +obj-$(CONFIG_UDFI_CIS) += cis.o +cis-objs := cis/cis_info_process.o \ + cis/uvb_info_process.o \ + cis/cis_core.o diff --git a/drivers/firmware/ubios_uvb/cis/cis_core.c b/drivers/firmware/ubios_uvb/cis/cis_core.c new file mode 100644 index 000000000000..d1e5938b306c --- /dev/null +++ b/drivers/firmware/ubios_uvb/cis/cis_core.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: Call ID Service (CIS) core module, manages inter-process communication + * via call identifiers with local/remote handling and UVB integration. + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include +#include +#include +#include +#include "cis_info_process.h" +#include "uvb_info_process.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Call ID Service Framework"); + +static struct task_struct *uvb_poll_window_thread; +DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); + +int create_uvb_poll_window_thread(void) +{ + uvb_poll_window_thread = kthread_run(uvb_poll_window, NULL, "uvb_poll_window_thread"); + if (IS_ERR(uvb_poll_window_thread)) { + pr_err("Failed to create uvb polling thread\n"); + return PTR_ERR(uvb_poll_window_thread); + } + + pr_info("create uvb poll window thread successfully\n"); + + return 0; +} + +void uvb_poll_window_thread_stop(void) +{ + if (uvb_poll_window_thread) { + kthread_stop(uvb_poll_window_thread); + uvb_poll_window_thread = NULL; + } +} + +static void free_uvb_window_lock(void) +{ + struct uvb_window_lock *entry; + struct hlist_node *tmp; + u32 bkt; + + if (hash_empty(uvb_lock_table)) + return; + + hash_for_each_safe(uvb_lock_table, bkt, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +static int uvb_window_lock_init(void) +{ + struct uvb *uvb; + struct uvb_window_lock *lock_node; + u16 i; + u16 j; + + for (i = 0; i < g_uvb_info->uvb_count; i++) { + uvb = g_uvb_info->uvbs[i]; + for (j = 0; j < uvb->window_count; j++) { + lock_node = kzalloc(sizeof(struct uvb_window_lock), GFP_KERNEL); + if (!lock_node) { + free_uvb_window_lock(); + return -ENOMEM; + } + lock_node->lock.counter = 0; + lock_node->window_address = uvb->wd[j].address; + hash_add(uvb_lock_table, &lock_node->node, uvb->wd[j].address); + } + } + pr_info("uvb window lock init success.\n"); + + return 0; +} + +int init_uvb(void) +{ + int err = 0; + + if (!g_uvb_info) { + pr_err("uvb is invalid, please try to use smc\n"); + return -EOPNOTSUPP; + } + + err = uvb_window_lock_init(); + if (err) { + pr_err("Init uvb window lock failed\n"); + return err; + } + + err = create_uvb_poll_window_thread(); + if (err) { + pr_err("create uvb poll thread did failed, err=%d\n", err); + free_uvb_window_lock(); + return err; + } + + return 0; +} + +int init_global_vars(void) +{ + io_param_sync = kzalloc(sizeof(struct cis_message), GFP_KERNEL); + if (!io_param_sync) + return -ENOMEM; + + return 0; +} + +int init_cis_table(void) +{ + if (!g_cis_info) { + pr_err("failed to get cis info from odf\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +void free_global_vars(void) +{ + kfree(io_param_sync); + io_param_sync = NULL; +} + +void uninit_uvb(void) +{ + uvb_poll_window_thread_stop(); + msleep(UVB_POLL_TIMEOUT); + free_uvb_window_lock(); +} + +static int __init cis_init(void) +{ + int err = 0; + + err = init_cis_table(); + if (err) { + pr_err("cis info init failed, err=%d\n", err); + return err; + } + + err = init_global_vars(); + if (err) { + pr_err("global vars malloc failed, err=%d\n", err); + return err; + } + + err = init_uvb(); + if (err) { + pr_err("uvb init failed, err=%d\n", err); + free_global_vars(); + return err; + } + + pr_info("cis init success\n"); + + return 0; +} + +static void __exit cis_exit(void) +{ + uninit_uvb(); + free_global_vars(); + pr_info("cis exit success\n"); +} + +module_init(cis_init); +module_exit(cis_exit); + diff --git a/drivers/ubios_uvb/cis/uvb_info_process.c b/drivers/firmware/ubios_uvb/cis/cis_info_process.c similarity index 42% rename from drivers/ubios_uvb/cis/uvb_info_process.c rename to drivers/firmware/ubios_uvb/cis/cis_info_process.c index 917990bb1c24..eb87fa528228 100644 --- a/drivers/ubios_uvb/cis/uvb_info_process.c +++ b/drivers/firmware/ubios_uvb/cis/cis_info_process.c @@ -1,292 +1,139 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: UVB info processing module, handles init and window polling. + * Description: Call ID Service (CIS) info processing module, handles CIS init, + * func register/lookup and group info retrieval. * Author: zhangrui * Create: 2025-04-18 */ +#define pr_fmt(fmt) "[UVB]: " fmt -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include - +#include #include "cis_info_process.h" -#include "io_param.h" #include "uvb_info_process.h" -DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); +LIST_HEAD(g_local_cis_list); +DEFINE_SPINLOCK(cis_register_lock); +struct cis_message *io_param_sync; -int cis_call_uvb_sync(u8 index, struct udfi_para *para) +void ubios_prepare_output_data(struct cis_message *io_param, void *output, u32 *output_size) { - int err; - struct uvb_window *window = NULL; - struct uvb_window_description *wd = NULL; - u64 *wd_obtain = NULL; - - if (!g_uvb_info) { - pr_err(ERR_PRE "uvb unsupported\n"); - return -EOPNOTSUPP; - } - - if (index >= g_uvb_info->uvb_count) { - pr_err(ERR_PRE "cis call sync use uvb index exceed\n"); - return -EOVERFLOW; - } - - pr_info(LOG_PRE "start to do sync cis call by uvb\n"); - wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); - if (!wd) { - pr_err(ERR_PRE "obtain window failed\n"); - err = -EBUSY; - goto free_resources; - } - - window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); - if (!window) { - pr_err(ERR_PRE "convert window addr from pa to va failed\n"); - err = -ENOMEM; - goto free_resources; - } - - err = uvb_fill_window(wd, window, io_param_sync, para); - if (err) { - pr_err(ERR_PRE "fill uvb window failed\n"); - goto free_resources; - } - - err = uvb_poll_window_call_sync(window, para->message_id); - if (err) { - pr_err(ERR_PRE "sync call by uvb failed\n"); - goto free_resources; - } - - err = uvb_get_output_data(window, io_param_sync, para->output, para->output_size); - if (err) - pr_err(ERR_PRE "uvb get output data failed\n"); - -free_resources: - if (wd->buffer && io_param_sync->input) - memunmap(io_param_sync->input); - - memset(io_param_sync, 0, sizeof(struct cis_message)); + memcpy(output, io_param->output, *(io_param->p_output_size)); + *output_size = *(io_param->p_output_size); +} - if (window) - uvb_free_window(window); +static bool is_call_id_supported(struct cis_group *group, u32 call_id) +{ + u32 i; - if (wd_obtain) { - *wd_obtain = 0; - memunmap(wd_obtain); + for (i = 0; i < group->cis_count; i++) { + pr_debug("cia call_id: %08x\n", group->call_id[i]); + if (group->call_id[i] == call_id) + return true; } - pr_info(LOG_PRE "finish cis call by uvb sync\n"); - return err; + return false; } -int cis_call_uvb(u8 index, struct udfi_para *para) +int get_cis_group_info(u32 call_id, u32 receiver_id, + u8 *usage, u8 *index, + u32 *exact_receiver_id, u32 *forwarder_id) { - int err; - struct uvb_window *window = NULL; - struct uvb_window_description *wd = NULL; - struct cis_message *io_param = NULL; - u64 *wd_obtain = NULL; + u32 i; - if (!g_uvb_info) { - pr_err(ERR_PRE "uvb unsupported\n"); + if (!g_cis_info) { + pr_err("can't get cis_info from odf\n"); return -EOPNOTSUPP; } - if (index >= g_uvb_info->uvb_count) { - pr_err(ERR_PRE "cis call use uvb index exceed\n"); - return -EOVERFLOW; - } - - pr_info(LOG_PRE "start to do cis call by uvb\n"); - wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); - if (!wd) { - pr_err(ERR_PRE "obtain window failed\n"); - err = -EBUSY; - goto free_resources; - } - - window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); - if (!window) { - pr_err(ERR_PRE "convert window addr from pa to va failed\n"); - err = -ENOMEM; - goto free_resources; - } - - io_param = kzalloc(sizeof(struct cis_message), GFP_KERNEL); - if (!io_param) { - err = -ENOMEM; - goto free_resources; - } - err = uvb_fill_window(wd, window, io_param, para); - if (err) { - pr_err(ERR_PRE "fill uvb window failed\n"); - goto free_resources; + for (i = 0; i < g_cis_info->group_count; i++) { + if (receiver_id != g_cis_info->groups[i]->owner_user_id && + receiver_id != ubios_get_user_type(g_cis_info->groups[i]->owner_user_id)) + continue; + if (is_call_id_supported(g_cis_info->groups[i], call_id)) { + *usage = g_cis_info->groups[i]->usage; + *index = g_cis_info->groups[i]->index; + *exact_receiver_id = g_cis_info->groups[i]->owner_user_id; + *forwarder_id = g_cis_info->groups[i]->forwarder_id; + return 0; + } } - err = uvb_poll_window_call(window, para->message_id); - if (err) { - pr_err(ERR_PRE "call by uvb failed\n"); - goto free_resources; + if (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_UB_DEVICE) { + *usage = g_cis_info->ub.usage; + *index = g_cis_info->ub.index; + *exact_receiver_id = receiver_id; + *forwarder_id = g_cis_info->ub.forwarder_id; + pr_info("refresh info, usage=%d, index=%d, forward_id=%08x\n", + *usage, *index, *forwarder_id); + return 0; } - err = uvb_get_output_data(window, io_param, para->output, para->output_size); - if (err) - pr_err(ERR_PRE "uvb get output data failed\n"); + pr_err("call id: %08x not supported\n", call_id); -free_resources: - if (io_param) - ubios_uvb_free_io_param(io_param, (wd->buffer == 0)); - - if (window) { - uvb_free_window(window); - memunmap(window); - } - if (wd_obtain) { - *wd_obtain = 0; - memunmap(wd_obtain); - } - pr_info(LOG_PRE "finish cis call by uvb\n"); - - return err; + return -EOPNOTSUPP; } -/** -Calculate checksum in 4bytes, if size not aligned with 4bytes, padding with 0. +/* +Search Call ID Service owned by this component, return the function. */ -static u32 checksum32(const void *data, u32 size) +struct cis_func_node *search_local_cis_func_node(u32 call_id, u32 receiver_id) { - u64 i; - u64 sum = 0; - u32 remainder = size % sizeof(u32); - u32 *p = (u32 *)data; - u32 restsize = size - remainder; - - if (!data) - return (u32)-1; - - for (i = 0; i < restsize; i += sizeof(u32)) { - sum += *p; - p++; - } - - switch (remainder) { - case 1: - sum += (*p) & 0x000000FF; - break; - case 2: - sum += (*p) & 0x0000FFFF; - break; - case 3: - sum += (*p) & 0x00FFFFFF; - break; - default: - break; - } - - return (u32)(sum); -} + struct cis_func_node *cis_node = NULL; + struct cis_func_node *tmp; -static void free_uvb_window_lock(void) -{ - struct uvb_window_lock *entry; - struct hlist_node *tmp; - u32 bkt; - - if (hash_empty(uvb_lock_table)) - return; - - hash_for_each_safe(uvb_lock_table, bkt, tmp, entry, node) { - hash_del(&entry->node); - kfree(entry); - } -} - -static int uvb_window_lock_init(void) -{ - struct uvb *uvb; - struct uvb_window_lock *lock_node; - u16 i; - u16 j; - - for (i = 0; i < g_uvb_info->uvb_count; i++) { - uvb = g_uvb_info->uvbs[i]; - for (j = 0; j < uvb->window_count; j++) { - lock_node = kzalloc(sizeof(struct uvb_window_lock), GFP_KERNEL); - if (!lock_node) { - free_uvb_window_lock(); - return -ENOMEM; - } - lock_node->lock.counter = 0; - lock_node->window_address = uvb->wd[j].address; - hash_add(uvb_lock_table, &lock_node->node, uvb->wd[j].address); + rcu_read_lock(); + list_for_each_entry_rcu(tmp, &g_local_cis_list, link) { + if ((tmp->call_id == call_id) && (tmp->receiver_id == receiver_id)) { + cis_node = tmp; + break; } } - pr_info(LOG_PRE "uvb window lock init success.\n"); - - return 0; -} + rcu_read_unlock(); -static void uvb_return_status(struct uvb_window *window, int status) -{ - window->returned_status = (u32)status; - window->message_id = ~window->message_id; + return cis_node; } -bool search_local_receiver_id(u32 receiver_id) +/* +Search local Call ID Service Functon according Call ID, return the function. +*/ +msg_handler search_local_cis_func(u32 call_id, u32 receiver_id) { - bool found = false; struct cis_func_node *cis_node; - rcu_read_lock(); - list_for_each_entry_rcu(cis_node, &g_local_cis_list, link) { - if (cis_node->receiver_id == receiver_id) { - found = true; - break; - } - } - rcu_read_unlock(); + cis_node = search_local_cis_func_node(call_id, receiver_id); + if (cis_node) + return cis_node->func; - return found; + return NULL; } -void uninit_uvb(void) +static bool cis_call_for_me(u32 receiver_id) { - free_uvb_window_lock(); + if ((receiver_id == UBIOS_USER_ID_ALL) || + (receiver_id == ubios_get_user_type(UBIOS_MY_USER_ID)) || + (receiver_id == UBIOS_MY_USER_ID)) { + return true; + } + + return false; } -int init_uvb(void) +static bool cis_call_for_local(u32 receiver_id) { - int err = 0; - - if (!g_uvb_info) { - pr_warn(LOG_PRE "uvb is invalid, please try to use smc\n"); - return -EOPNOTSUPP; + if ((ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_UB_DEVICE) || + (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_PCIE_DEVICE)) { + return true; } - err = uvb_window_lock_init(); - if (err) { - pr_err(ERR_PRE "Init uvb window lock failed\n"); - return err; - } - - return err; + return false; } static atomic_t *find_uvb_window_lock(u64 window_address) @@ -334,12 +181,12 @@ struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, wd = &(uvb->wd[i]); wd_obtain = memremap(wd->obtain, wd->size, MEMREMAP_WC); if (!wd_obtain) { - pr_err(ERR_PRE "uvb window obtain map failed\n"); + pr_err("uvb window obtain map failed\n"); return NULL; } lock = find_uvb_window_lock(wd->address); if (!lock) { - pr_err(ERR_PRE "uvb window lock not found\n"); + pr_err("uvb window lock not found\n"); goto free_resources; } @@ -350,7 +197,7 @@ struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, if (*wd_obtain == sender_id) { now = ktime_get(); time_interval = ktime_to_us(ktime_sub(now, start)); - pr_info(LOG_PRE "occupy uvb window successfully, elapsed time: %lldus\n", + pr_info("occupy uvb window successfully, elapsed time: %lldus\n", time_interval); return wd; } @@ -359,7 +206,7 @@ struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, now = ktime_get(); time_interval = ktime_to_us(ktime_sub(now, start)); if (round > 1 && time_interval > UVB_TIMEOUT_WINDOW_OBTAIN) { - pr_err(ERR_PRE "obtain window timeout, tried %u * %u = %u times\n", + pr_err("obtain window timeout, tried %u * %u = %u times\n", round, (u32)(uvb->window_count), round * (u32)(uvb->window_count)); goto free_resources; } @@ -375,8 +222,18 @@ struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, return NULL; } +void uvb_free_wd_obtain(u64 *wd_obtain) +{ + if (!wd_obtain) + return; + *wd_obtain = 0; + memunmap(wd_obtain); +} + int uvb_free_window(struct uvb_window *window) { + if (!window) + return 0; window->input_data_address = 0; window->input_data_size = 0; window->input_data_checksum = 0; @@ -407,10 +264,16 @@ static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, void *new_output = NULL; window = window_address; + if (output_size) { + if (wd->size < *output_size + input_size) + return -EOVERFLOW; + window->output_data_size = *output_size; + } + if (input) { new_input = memremap(wd->buffer, wd->size, MEMREMAP_WC); if (!new_input) { - pr_err(ERR_PRE "memremap for wd_buffer_virt_addr failed\n"); + pr_err("memremap for wd_buffer_virt_addr failed\n"); return -ENOMEM; } memcpy(new_input, input, input_size); @@ -420,12 +283,6 @@ static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, if (output) new_output = (void *)(new_input + input_size); - if (output_size) { - if (wd->size < *output_size + input_size) - return -EOVERFLOW; - window->output_data_size = *output_size; - } - io_params->input = new_input; io_params->input_size = input_size; io_params->output = new_output; @@ -438,54 +295,6 @@ static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, return 0; } -static int fill_uvb_window_without_buffer(struct uvb_window *window_address, - struct cis_message *io_params, - void *input, u32 input_size, - void *output, u32 *output_size) -{ - int err = 0; - struct uvb_window *window; - void *input_kloc = NULL; - void *output_kloc = NULL; - - window = window_address; - if (input) { - input_kloc = kzalloc(input_size, GFP_KERNEL); - if (!input_kloc) { - err = -ENOMEM; - goto fail; - } - memcpy(input_kloc, input, input_size); - window->input_data_checksum = checksum32(input, input_size); - } - if (output) { - output_kloc = kzalloc(*output_size, GFP_KERNEL); - if (!output_kloc) { - err = -ENOMEM; - goto fail; - } - memcpy(output_kloc, output, *output_size); - } - if (output_size) - window->output_data_size = *output_size; - - io_params->input = input_kloc; - io_params->input_size = input_size; - io_params->output = output_kloc; - io_params->p_output_size = &(window->output_data_size); - - window->input_data_address = input_kloc ? (u64)virt_to_phys(input_kloc) : 0; - window->input_data_size = input_size; - window->output_data_address = output_kloc ? virt_to_phys(output_kloc) : 0; - return 0; - -fail: - kfree(input_kloc); - kfree(output_kloc); - - return err; -} - int uvb_fill_window(struct uvb_window_description *wd, struct uvb_window *wd_addr, struct cis_message *io_params, struct udfi_para *para) { @@ -496,29 +305,19 @@ int uvb_fill_window(struct uvb_window_description *wd, struct uvb_window *wd_add window->message_id = para->message_id; window->sender_id = para->sender_id; - if (wd->buffer == 0) { - err = fill_uvb_window_without_buffer(window, io_params, para->input, - para->input_size, para->output, para->output_size); - if (err) { - pr_err(ERR_PRE "fill uvb window without buffer failed\n"); - goto fail; - } - } else { - err = fill_uvb_window_with_buffer(wd, window, io_params, para->input, - para->input_size, para->output, para->output_size); - if (err) { - pr_err(ERR_PRE "fill uvb window with buffer failed\n"); - goto fail; - } + err = fill_uvb_window_with_buffer(wd, window, io_params, para->input, + para->input_size, para->output, para->output_size); + if (err) { + pr_err("fill uvb window with buffer failed\n"); + uvb_free_window(window); + return err; } window->receiver_id = para->receiver_id; window->forwarder_id = para->forwarder_id; - pr_info(LOG_PRE "uvb fill window success\n"); + pr_info("uvb fill window success\n"); return 0; -fail: - return err; } int uvb_poll_window_call(struct uvb_window *window, u32 call_id) @@ -529,17 +328,16 @@ int uvb_poll_window_call(struct uvb_window *window, u32 call_id) start = ktime_get(); while (1) { - if (window->message_id == ~call_id) { - pr_info(LOG_PRE "window message id seted to 0x%08x\n", window->message_id); + if (window->message_id == ~call_id) return (int)window->returned_status; - } + now = ktime_get(); time_interval = ktime_to_ms(ktime_sub(now, start)); if (time_interval > UVB_POLL_TIMEOUT) break; } - pr_err(ERR_PRE "uvb poll window call timeout,wait=%lld ms\n", time_interval); + pr_err("uvb poll window call timeout,wait=%lld ms\n", time_interval); return -ETIMEDOUT; } @@ -548,16 +346,15 @@ int uvb_poll_window_call_sync(struct uvb_window *window, u32 call_id) { int i; - pr_info(LOG_PRE "start uvb window polling\n"); + pr_info("start uvb window polling\n"); for (i = 0; i < UVB_POLL_TIMEOUT_TIMES; i++) { - if (window->message_id == ~call_id) { - pr_info(LOG_PRE "window message id seted to 0x%08x\n", window->message_id); + if (window->message_id == ~call_id) return (int)window->returned_status; - } + udelay(UVB_POLL_TIME_INTERVAL); } - pr_err(ERR_PRE "uvb poll window call sync timeout\n"); + pr_err("uvb poll window call sync timeout\n"); return -ETIMEDOUT; } @@ -576,10 +373,318 @@ int uvb_get_output_data(struct uvb_window *window, if (window->output_data_checksum != checksum32(io_param->output, window->output_data_size)) { - pr_warn(LOG_PRE "returned data checksum error\n"); + pr_warn("returned data checksum error\n"); return -EINVAL; } ubios_prepare_output_data(io_param, output, output_size); return 0; } + +void free_io_param_with_buffer(struct cis_message *io_param) +{ + if (!io_param) + return; + + if (io_param->input) + memunmap(io_param->input); + kfree(io_param); +} + +int cis_call_uvb(u8 index, struct udfi_para *para) +{ + int err; + struct uvb_window *window = NULL; + struct uvb_window_description *wd = NULL; + struct cis_message *io_param = NULL; + u64 *wd_obtain = NULL; + + if (!g_uvb_info) { + pr_err("uvb unsupported\n"); + return -EOPNOTSUPP; + } + + if (index >= g_uvb_info->uvb_count) { + pr_err("cis call uvb index exceed uvb count\n"); + return -EOVERFLOW; + } + + wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); + if (!wd) { + pr_err("obtain window failed\n"); + return -EBUSY; + } + + if (!wd->buffer) { + pr_err("no window buffer to save data\n"); + goto free_obtain; + } + + io_param = kzalloc(sizeof(struct cis_message), GFP_KERNEL); + if (!io_param) { + err = -ENOMEM; + goto free_obtain; + } + + window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("memremap uvb window failed\n"); + err = -ENOMEM; + goto free_io_param; + } + + err = uvb_fill_window(wd, window, io_param, para); + if (err) { + pr_err("fill uvb window failed\n"); + goto unmap_window; + } + + err = uvb_poll_window_call(window, para->message_id); + if (err) { + pr_err("call by uvb failed\n"); + goto free_window; + } + + err = uvb_get_output_data(window, io_param, para->output, para->output_size); + if (err) + pr_err("uvb get output data failed\n"); + +free_window: + uvb_free_window(window); +unmap_window: + memunmap(window); +free_io_param: + free_io_param_with_buffer(io_param); +free_obtain: + uvb_free_wd_obtain(wd_obtain); + pr_info("finish cis call by uvb\n"); + + return err; +} + +int cis_call_uvb_sync(u8 index, struct udfi_para *para) +{ + int err; + struct uvb_window *window = NULL; + struct uvb_window_description *wd = NULL; + u64 *wd_obtain = NULL; + + memset(io_param_sync, 0, sizeof(struct cis_message)); + + if (!g_uvb_info) { + pr_err("sync call uvb unsupported\n"); + return -EOPNOTSUPP; + } + + if (index >= g_uvb_info->uvb_count) { + pr_err("sync call use uvb index exceed\n"); + return -EOVERFLOW; + } + + wd = uvb_occupy_window(g_uvb_info->uvbs[index], para->sender_id, wd_obtain); + if (!wd) { + pr_err("sync call obtain window failed\n"); + return -EBUSY; + } + + if (!wd->buffer) { + pr_err("sync call no window buffer to save data\n"); + goto free_obtain; + } + + window = (struct uvb_window *)memremap(wd->address, sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("sync call memremap window failed\n"); + err = -ENOMEM; + goto free_obtain; + } + + err = uvb_fill_window(wd, window, io_param_sync, para); + if (err) { + pr_err("sync call fill uvb window failed\n"); + goto unmap_window; + } + + err = uvb_poll_window_call_sync(window, para->message_id); + if (err) { + pr_err("sync call by uvb failed\n"); + goto free_window; + } + + err = uvb_get_output_data(window, io_param_sync, para->output, para->output_size); + if (err) + pr_err("sync call uvb get output data failed\n"); + +free_window: + uvb_free_window(window); + if (io_param_sync->input) + memunmap(io_param_sync->input); +unmap_window: + memunmap(window); +free_obtain: + uvb_free_wd_obtain(wd_obtain); + pr_info("finish cis sync call by uvb\n"); + + return err; +} + +int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, + bool is_sync) +{ + u32 forwarder_id; + u32 exact_receiver_id; + u8 usage; + u8 index; + int res; + struct udfi_para para = { 0 }; + + res = get_cis_group_info(call_id, receiver_id, + &usage, &index, &exact_receiver_id, &forwarder_id); + if (res) { + pr_err("can't get group info, call id=%08x, receiver id=%08x\n", + call_id, receiver_id); + return -EOPNOTSUPP; + } + + para.input = msg->input; + para.input_size = msg->input_size; + para.output = msg->output; + para.output_size = msg->p_output_size; + para.message_id = call_id; + para.receiver_id = exact_receiver_id; + para.sender_id = sender_id; + para.forwarder_id = forwarder_id; + + if (usage != CIS_USAGE_UVB) { + pr_err("method not supported, call id=%08x, receiver id=%08x, usage=%d\n", + call_id, receiver_id, usage); + return -EOPNOTSUPP; + } + + if (is_sync) + return cis_call_uvb_sync(index, ¶); + + return cis_call_uvb(index, ¶); +} + +/** + * cis_call - Trigger a cis call with given aruguments. + * + * @call_id: call id that identifies which cis call will be triggered. + * @sender_id: user id of sender. + * @receiver_id: user id of receiver. + * @msg: the data that the user needs to transmit. + * @is_sync: whether to use a synchronous interface. + * + * Search for cia (call id attribute) in cis info with given call id and receiver id. + * The `usage` property of cia determines which method to used (uvb/arch call). + * Return 0 if cis call succeeds or communication method is not supported, + * else return cis error code. + */ +int cis_call_by_uvb(u32 call_id, u32 sender_id, u32 receiver_id, + struct cis_message *msg, bool is_sync) +{ + int ret; + msg_handler func; + + pr_debug("cis call: call id %08x, sender id %08x, receiver id %08x\n", + call_id, sender_id, receiver_id); + if (cis_call_for_me(receiver_id) || cis_call_for_local(receiver_id)) { + func = search_local_cis_func(call_id, receiver_id); + if (func) { + ret = func(msg); + if (ret) { + pr_err("cis call execute registered cis func failed\n"); + return ret; + } + pr_info("cis call execute registered cis func success\n"); + return 0; + } + pr_err("can't found cis func for callid=%08x, receiver_id=%08x\n", + call_id, receiver_id); + return -EOPNOTSUPP; + } + + return cis_call_remote(call_id, sender_id, receiver_id, msg, is_sync); +} +EXPORT_SYMBOL(cis_call_by_uvb); + +/* +Register a Call ID Service +@call_id - UBIOS Interface ID +@receiver_id - UBIOS User ID who own this CIS +@func - Callback function of Call ID +*/ +int register_local_cis_func(u32 call_id, u32 receiver_id, msg_handler func) +{ + struct cis_func_node *p; + unsigned long flags; + + if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { + pr_err("register is not uvb call\n"); + return -EINVAL; + } + if (!func) { + pr_err("register func is NULL\n"); + return -EINVAL; + } + + /* check is this Call ID already has a funciton */ + if (search_local_cis_func_node(call_id, receiver_id)) { + pr_err("cis register: call_id[%08x], receiver_id[%08x], already register func\n", + call_id, receiver_id); + return -EINVAL; + } + + p = kcalloc(1, sizeof(struct cis_func_node), GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->call_id = call_id; + p->receiver_id = receiver_id; + p->func = func; + + spin_lock_irqsave(&cis_register_lock, flags); + list_add_tail_rcu(&p->link, &g_local_cis_list); + spin_unlock_irqrestore(&cis_register_lock, flags); + pr_info("register cis func success\n"); + + return 0; +} +EXPORT_SYMBOL(register_local_cis_func); + +/* +Unregister a Call ID Service +@call_id - UBIOS Interface ID +@receiver_id - UBIOS User ID who own this CIS +*/ +int unregister_local_cis_func(u32 call_id, u32 receiver_id) +{ + struct cis_func_node *p; + unsigned long flags; + + if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { + pr_err("unregister is not uvb call\n"); + return -EINVAL; + } + + p = search_local_cis_func_node(call_id, receiver_id); + if (!p) { + pr_err("cis unregister: call_id[%08x], receiver_id[%08x] not find func node.\n", + call_id, receiver_id); + return -EINVAL; + } + + spin_lock_irqsave(&cis_register_lock, flags); + list_del_rcu(&p->link); + spin_unlock_irqrestore(&cis_register_lock, flags); + synchronize_rcu(); + + kfree(p); + pr_info("unregister cis func success\n"); + + return 0; +} +EXPORT_SYMBOL(unregister_local_cis_func); + diff --git a/drivers/ubios_uvb/cis/cis_info_process.h b/drivers/firmware/ubios_uvb/cis/cis_info_process.h similarity index 64% rename from drivers/ubios_uvb/cis/cis_info_process.h rename to drivers/firmware/ubios_uvb/cis/cis_info_process.h index 2ddf071699f5..ad2ed2467fda 100644 --- a/drivers/ubios_uvb/cis/cis_info_process.h +++ b/drivers/firmware/ubios_uvb/cis/cis_info_process.h @@ -10,12 +10,18 @@ #ifndef CIS_INFO_PROCESS_H #define CIS_INFO_PROCESS_H -#include #include "cis_uvb_interface.h" +#define CIS_USAGE_UVB 2 +#define MAX_UVB_LOCK_IN_BITS 8 +#define UVB_POLL_TIME_INTERVAL (100) /* 100us */ +#define UVB_POLL_TIMEOUT (1000) /* 1000ms */ +#define UVB_TIMEOUT_WINDOW_OBTAIN (10000) /* 10000ms */ +#define UVB_POLL_TIMEOUT_TIMES (10000) /* 10000 times */ + extern struct cis_message *io_param_sync; extern struct list_head g_local_cis_list; -extern spinlock_t cis_register_lock; +extern DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); struct udfi_para { u32 message_id; @@ -35,17 +41,15 @@ struct cis_func_node { msg_handler func; }; -int init_cis_table(void); -int init_global_vars(void); -void free_global_vars(void); +struct uvb_window_lock { + atomic_t lock; + u64 window_address; + struct hlist_node node; +}; -int get_cis_group_info(u32 call_id, u32 receiver_id, - u8 *usage, u8 *index, - u32 *exact_receiver_id, u32 *forwarder_id); int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, struct cis_message *msg, bool is_sync); -msg_handler search_my_cis_func(u32 call_id); msg_handler search_local_cis_func(u32 call_id, u32 receiver_id); static inline u32 ubios_get_user_type(u32 user_id) diff --git a/drivers/firmware/ubios_uvb/cis/uvb_info_process.c b/drivers/firmware/ubios_uvb/cis/uvb_info_process.c new file mode 100644 index 000000000000..24f95982ad72 --- /dev/null +++ b/drivers/firmware/ubios_uvb/cis/uvb_info_process.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * Description: UVB info processing module, handles init and window polling. + * Author: zhangrui + * Create: 2025-04-18 + */ +#define pr_fmt(fmt) "[UVB]: " fmt + +#include +#include +#include +#include +#include "cis_info_process.h" +#include "uvb_info_process.h" + +/** +Calculate checksum in 4bytes, if size not aligned with 4bytes, padding with 0. +*/ +u32 checksum32(const void *data, u32 size) +{ + u64 i; + u64 sum = 0; + u32 remainder = size % sizeof(u32); + u32 *p = (u32 *)data; + u32 restsize = size - remainder; + + if (!data) + return (u32)-1; + + for (i = 0; i < restsize; i += sizeof(u32)) { + sum += *p; + p++; + } + + switch (remainder) { + case 1: + sum += (*p) & 0x000000FF; + break; + case 2: + sum += (*p) & 0x0000FFFF; + break; + case 3: + sum += (*p) & 0x00FFFFFF; + break; + default: + break; + } + + return (u32)(sum); +} + +static bool is_address_exceed(void *buffer, u32 buffer_size, void *input_address, + u32 input_size, void *output_address, u32 *output_size) +{ + void *end_of_buffer = buffer + buffer_size; + + if (input_address) { + if ((input_address < buffer) || (input_address + input_size >= end_of_buffer)) { + pr_err("input address exceed.\n"); + return true; + } + } + + if (output_address && output_size) { + if ((output_address < buffer + input_size) + || (output_address + *output_size >= end_of_buffer)) { + pr_err("output address exceed.\n"); + return true; + } + } + + return false; +} + +static int uvb_get_input_data(struct uvb_window *window, void *buffer, u32 buffer_size, + struct cis_message *msg, void *virt_input, void *virt_output) +{ + msg->input_size = window->input_data_size; + if (window->output_data_size == UVB_OUTPUT_SIZE_NULL) + msg->p_output_size = NULL; + else + msg->p_output_size = &window->output_data_size; + + if (!buffer) { + msg->input = (void *)window->input_data_address; + msg->output = (void *)window->output_data_address; + } else { + msg->input = (window->input_data_address == 0 ? NULL : buffer); + msg->output = (window->output_data_address == 0 ? NULL : + ((u8 *)buffer + ALIGN(msg->input_size, sizeof(u64)))); + if (is_address_exceed(buffer, buffer_size, msg->input, msg->input_size, + msg->output, msg->p_output_size)) { + pr_err("address is exceed\n"); + return -EOVERFLOW; + } + } + if (msg->input && msg->input_size) { + virt_input = memremap((u64)msg->input, msg->input_size, MEMREMAP_WC); + if (!virt_input) { + pr_err("memremap for input failed\n"); + return -ENOMEM; + } + msg->input = virt_input; + } + if (msg->output && msg->p_output_size && *msg->p_output_size) { + virt_output = memremap((u64)msg->output, *msg->p_output_size, MEMREMAP_WC); + if (!virt_output) { + pr_err("memremap for output failed\n"); + return -ENOMEM; + } + msg->output = virt_output; + } + if (msg->input_size) { + if (window->input_data_checksum != checksum32(msg->input, msg->input_size)) { + pr_err("input data checksum error\n"); + return -EINVAL; + } + } + return 0; +} + +static void uvb_return_status(struct uvb_window *window, int status) +{ + window->returned_status = (u32)status; + window->message_id = ~window->message_id; +} + +bool search_local_receiver_id(u32 receiver_id) +{ + bool found = false; + struct cis_func_node *cis_node; + + rcu_read_lock(); + list_for_each_entry_rcu(cis_node, &g_local_cis_list, link) { + if (cis_node->receiver_id == receiver_id) { + found = true; + break; + } + } + rcu_read_unlock(); + + return found; +} + +static void uvb_polling_window(struct uvb_window_description *wd) +{ + int err = 0; + bool found; + u32 receiver_id, message_id; + struct uvb_window *window = NULL; + struct cis_message msg = { 0 }; + msg_handler func; + void *virt_addr_input = NULL; + void *virt_addr_output = NULL; + + window = (struct uvb_window *)memremap(wd->address, + sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("polling window failed to map window addr\n"); + return; + } + receiver_id = window->receiver_id; + message_id = window->message_id; + + if (window->receiver_id) { + pr_debug("UVB window address: %llx\n", wd->address); + pr_debug("Version = %08x\n", window->version); + pr_debug("Message ID = %08x\n", window->message_id); + pr_debug("Sender ID = %08x\n", window->sender_id); + pr_debug("Receiver ID = %08x\n", window->receiver_id); + pr_debug("Forwarder ID = %08x\n", window->forwarder_id); + pr_debug("Input Data Address = %llx\n", window->input_data_address); + pr_debug("Input Data Size = %08x\n", window->input_data_size); + pr_debug("Output Data Address = %llx\n", window->output_data_address); + pr_debug("Output Data Size = %08x\n", window->output_data_size); + pr_debug("Returned Status = %08x\n", window->returned_status); + pr_debug("Buffer = %llx, size = %08x\n", wd->buffer, wd->size); + } + + found = search_local_receiver_id(receiver_id); + if (found) { + pr_debug("polling window start for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + window->receiver_id = 0; + /* get input data and check */ + err = uvb_get_input_data(window, (void *)wd->buffer, wd->size, + &msg, virt_addr_input, virt_addr_output); + if (err) { + uvb_return_status(window, err); + goto free_resources; + } + func = search_local_cis_func(message_id, receiver_id); + if (func) { + err = func(&msg); + if (!err && msg.output && *msg.p_output_size) + window->output_data_checksum = + checksum32(msg.output, *msg.p_output_size); + } else { + pr_err("polling window not found local cis func for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + err = -EOPNOTSUPP; + } + pr_info("polling window execute local cis func success\n"); + uvb_return_status(window, err); + goto free_resources; + /* need uvb to forward */ + } else if (window->forwarder_id == UBIOS_MY_USER_ID) { + pr_info("cis call forward start\n"); + window->forwarder_id = 0; + + err = uvb_get_input_data(window, (void *)wd->buffer, wd->size, + &msg, virt_addr_input, virt_addr_output); + if (err) { + uvb_return_status(window, err); + goto free_resources; + } + err = cis_call_remote(message_id, UBIOS_MY_USER_ID, receiver_id, &msg, false); + if (!err && msg.output && *msg.p_output_size) + window->output_data_checksum = + checksum32(msg.output, *msg.p_output_size); + pr_info("cis call forward end\n"); + uvb_return_status(window, err); + goto free_resources; + } + +free_resources: + if (virt_addr_input) + memunmap(virt_addr_input); + + if (virt_addr_output) + memunmap(virt_addr_output); + + if (window) + memunmap(window); +} + +static int uvb_polling_window_sync(struct uvb_window_description *wd) +{ + int err = -EAGAIN; + bool found; + struct uvb_window *window = NULL; + u32 receiver_id, message_id; + struct cis_message msg; + msg_handler func; + void *virt_addr_input = NULL; + void *virt_addr_output = NULL; + + window = (struct uvb_window *)memremap(wd->address, + sizeof(struct uvb_window), MEMREMAP_WC); + if (!window) { + pr_err("polling window sync failed to map window addr\n"); + return -ENOMEM; + } + + receiver_id = window->receiver_id; + message_id = window->message_id; + + found = search_local_receiver_id(receiver_id); + if (found) { + pr_debug("polling window sync start for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + window->receiver_id = 0; + err = uvb_get_input_data(window, (void *)wd->buffer, wd->size, + &msg, virt_addr_input, virt_addr_output); + if (err) { + err = -EINVAL; + uvb_return_status(window, err); + goto free_resources; + } + func = search_local_cis_func(message_id, receiver_id); + if (func) { + err = func(&msg); + if (!err && msg.output && *msg.p_output_size) + window->output_data_checksum = + checksum32(msg.output, *msg.p_output_size); + if (err) + err = -EPERM; + } else { + pr_err("polling window sync not found cis func for callid=%08x, receiverid=%08x\n", + message_id, receiver_id); + err = -EOPNOTSUPP; + } + pr_info("polling window sync execute local cis func success\n"); + uvb_return_status(window, err); + goto free_resources; + } + +free_resources: + if (virt_addr_input) + memunmap(virt_addr_input); + + if (virt_addr_output) + memunmap(virt_addr_output); + + if (window) + memunmap(window); + + return err; +} + +int uvb_poll_window(void *data) +{ + int i; + int j; + struct uvb *uvb; + + while (!kthread_should_stop()) { + for (i = 0; i < g_uvb_info->uvb_count; i++) { + uvb = g_uvb_info->uvbs[i]; + if (!uvb) + continue; + + if (uvb->window_count == 0) + continue; + + for (j = 0; j < uvb->window_count; j++) + uvb_polling_window(&uvb->wd[j]); + } + msleep(1); + } + + return 0; +} + +int uvb_polling_sync(void *data) +{ + int i; + int j; + int index; + int err; + struct uvb *uvb; + + for (index = 0; index < UVB_POLL_TIMEOUT_TIMES; index++) { + for (i = 0; i < g_uvb_info->uvb_count; i++) { + uvb = g_uvb_info->uvbs[i]; + if (!uvb) + continue; + + if (uvb->window_count == 0) + continue; + + for (j = 0; j < uvb->window_count; j++) { + err = uvb_polling_window_sync(&uvb->wd[j]); + if (err == -EAGAIN) + continue; + return err; + } + } + udelay(UVB_POLL_TIME_INTERVAL); + } + + pr_err("timeout occurred after 1s\n"); + + return -ETIMEDOUT; +} +EXPORT_SYMBOL(uvb_polling_sync); + + diff --git a/drivers/ubios_uvb/cis/io_param.h b/drivers/firmware/ubios_uvb/cis/uvb_info_process.h similarity index 39% rename from drivers/ubios_uvb/cis/io_param.h rename to drivers/firmware/ubios_uvb/cis/uvb_info_process.h index 9c83dc242fad..7f7f0e7362db 100644 --- a/drivers/ubios_uvb/cis/io_param.h +++ b/drivers/firmware/ubios_uvb/cis/uvb_info_process.h @@ -1,17 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: io param header + * Description: uvb info process header * Author: zhangrui * Create: 2025-04-18 */ -#ifndef UBIOS_IO_PARAM_H -#define UBIOS_IO_PARAM_H - -#include - -void ubios_uvb_free_io_param(struct cis_message *param, u8 free_flag); -void ubios_prepare_output_data(struct cis_message *io_param, void *output, u32 *output_size); +#ifndef UVB_INFO_PROCESS_H +#define UVB_INFO_PROCESS_H +int uvb_poll_window(void *data); +u32 checksum32(const void *data, u32 size); #endif diff --git a/drivers/ubios_uvb/include/cis_uvb_interface.h b/drivers/firmware/ubios_uvb/include/cis_uvb_interface.h similarity index 96% rename from drivers/ubios_uvb/include/cis_uvb_interface.h rename to drivers/firmware/ubios_uvb/include/cis_uvb_interface.h index c476537eb51e..bc9d5a858cce 100644 --- a/drivers/ubios_uvb/include/cis_uvb_interface.h +++ b/drivers/firmware/ubios_uvb/include/cis_uvb_interface.h @@ -8,11 +8,8 @@ #ifndef CIS_UVB_INTERFACE_H #define CIS_UVB_INTERFACE_H -#include -#include "odf_interface.h" +#include -#define LOG_PRE "[UVB]:" -#define ERR_PRE "[UVB]ERR:" /** * struct cis_group - call id service group * @owner_user_id: user id that indicates which component owns the cia[] array diff --git a/drivers/ubios_uvb/odf/odf_data.c b/drivers/firmware/ubios_uvb/odf/odf_data.c similarity index 94% rename from drivers/ubios_uvb/odf/odf_data.c rename to drivers/firmware/ubios_uvb/odf/odf_data.c index 3dee113b8e29..af9383759121 100644 --- a/drivers/ubios_uvb/odf/odf_data.c +++ b/drivers/firmware/ubios_uvb/odf/odf_data.c @@ -5,9 +5,12 @@ * Author: zhangrui * Create: 2025-04-18 */ +#define pr_fmt(fmt) "[UVB]: " fmt + #include -#include "include/libodf.h" -#include "include/libodf_handle.h" +#include "odf_interface.h" +#include "odf_handle.h" +#include "cis_uvb_interface.h" /** @brief Search and match one value name, return the pointer of value structrue if matched. @@ -92,7 +95,7 @@ static int odf_get_vs_from_file(u8 *file, char *path, struct ubios_od_value_stru bool is_got_vs = false; if (!is_od_file_valid(file)) { - pr_err(ERR_PRE "odf: file[%llx] invalid\n", (u64)file); + pr_err("odf: file[%llx] invalid\n", (u64)file); return -EINVAL; } @@ -103,14 +106,14 @@ static int odf_get_vs_from_file(u8 *file, char *path, struct ubios_od_value_stru while (odf_separate_name(&path, name, UBIOS_OD_NAME_LEN_MAX, &index) == 0) { status = odf_get_vs_by_name(vs->data, vs->data + vs->data_length, name, vs); if (status) { - pr_err(ERR_PRE "odf: can not find name[%s]'s value\n", name); + pr_err("odf: can not find name[%s]'s value\n", name); return status; } is_got_vs = true; if (index != UBIOS_OD_INVALID_INDEX) { status = odf_change_vs_by_index(vs, index); if (status) { - pr_err(ERR_PRE "odf: get value by index failed, name[%s], type[%#x], index[%#x]\n", + pr_err("odf: get value by index failed, name[%s], type[%#x], index[%#x]\n", name, vs->type, index); return status; } @@ -119,7 +122,7 @@ static int odf_get_vs_from_file(u8 *file, char *path, struct ubios_od_value_stru if ((is_got_vs) && !path) return 0; - pr_err(ERR_PRE "odf: failed, left path[%s]\n", path); + pr_err("odf: failed, left path[%s]\n", path); return -EOPNOTSUPP; } @@ -137,13 +140,13 @@ static int odf_get_vs_from_root(struct ubios_od_root *root, char *path, status = odf_separate_name(&path, name, UBIOS_OD_NAME_LEN_MAX, NULL); if (status) { - pr_err(ERR_PRE "odf: get od file name failed, %d\n", status); + pr_err("odf: get od file name failed, %d\n", status); return status; } od_file = odf_get_od_file(root, name); if (!od_file) { - pr_err(ERR_PRE "odf: can not find od file[%s]\n", name); + pr_err("odf: can not find od file[%s]\n", name); return -ENOENT; } @@ -159,7 +162,7 @@ static bool is_root_and_path_valid(struct ubios_od_root *root, char *path) return false; if (!path) { - pr_err(ERR_PRE "odf: path is NULL\n"); + pr_err("odf: path is NULL\n"); return false; } @@ -213,7 +216,7 @@ int odf_vs_to_table(struct ubios_od_value_struct *vs, struct ubios_od_table_info table_info->length_per_row += sizeof(u64); break; default: - pr_err(ERR_PRE "odf: get table[%s] info, invalid type[%d] of column[%llu]\n", + pr_err("odf: get table[%s] info, invalid type[%d] of column[%llu]\n", table_info->table_name, type, i); return -EOPNOTSUPP; } @@ -273,7 +276,7 @@ int odf_get_offset_in_table(const struct ubios_od_table_info *table, temp_offset += sizeof(u64); break; default: - pr_err(ERR_PRE "odf: get table info, invalid type[%d] of column[%llu]\n", + pr_err("odf: get table info, invalid type[%d] of column[%llu]\n", data_type, i); return -EOPNOTSUPP; } @@ -346,7 +349,7 @@ int odf_get_data_from_table(const struct ubios_od_table_info *table, *(s64 *)value = (s64)odf_read64(p); break; default: - pr_err(ERR_PRE "odf: get table data failed, invalid type[%#x]\n", type); + pr_err("odf: get table data failed, invalid type[%#x]\n", type); return -EOPNOTSUPP; } @@ -392,7 +395,7 @@ int odf_get_list_from_table(u8 *table, char *path, struct ubios_od_list_info *li return status; if ((vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { - pr_err(ERR_PRE "odf:the type[%#x] is not a list\n", vs.type); + pr_err("odf:the type[%#x] is not a list\n", vs.type); return -EFAULT; } @@ -444,7 +447,7 @@ int odf_get_list(struct ubios_od_root *root, char *path, struct ubios_od_list_in return status; if ((vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { - pr_err(ERR_PRE "the type[%#x] is not a list\n", vs.type); + pr_err("the type[%#x] is not a list\n", vs.type); return -EFAULT; } @@ -532,7 +535,7 @@ int odf_get_data_from_list(const struct ubios_od_list_info *list, vs->data_length = odf_read32(p); break; default: - pr_err(ERR_PRE "odf: invalid type[%#x], not support\n", vs->type); + pr_err("odf: invalid type[%#x], not support\n", vs->type); return -EOPNOTSUPP; } @@ -578,7 +581,7 @@ int odf_next_in_list(const struct ubios_od_list_info *list, struct ubios_od_valu vs->data = p + sizeof(u32); break; default: - pr_err(ERR_PRE "odf: invalid type[%#x], not support\n", vs->type); + pr_err("odf: invalid type[%#x], not support\n", vs->type); return -EOPNOTSUPP; } if (vs->data >= list->end) @@ -715,7 +718,7 @@ int odf_get_list_from_struct(const struct ubios_od_value_struct *vs, return status; if ((temp_vs.type & UBIOS_OD_TYPE_LIST) != UBIOS_OD_TYPE_LIST) { - pr_err(ERR_PRE "the type[%#x] is not a list\n", temp_vs.type); + pr_err("the type[%#x] is not a list\n", temp_vs.type); return -EFAULT; } @@ -723,3 +726,4 @@ int odf_get_list_from_struct(const struct ubios_od_value_struct *vs, return 0; } + diff --git a/drivers/ubios_uvb/odf/odf_get_fdt.c b/drivers/firmware/ubios_uvb/odf/odf_get_fdt.c similarity index 100% rename from drivers/ubios_uvb/odf/odf_get_fdt.c rename to drivers/firmware/ubios_uvb/odf/odf_get_fdt.c diff --git a/drivers/ubios_uvb/odf/include/libodf_handle.h b/drivers/firmware/ubios_uvb/odf/odf_handle.h similarity index 99% rename from drivers/ubios_uvb/odf/include/libodf_handle.h rename to drivers/firmware/ubios_uvb/odf/odf_handle.h index e50d26d3afd3..136f4c98364e 100644 --- a/drivers/ubios_uvb/odf/include/libodf_handle.h +++ b/drivers/firmware/ubios_uvb/odf/odf_handle.h @@ -5,8 +5,8 @@ * Author: zhangrui * Create: 2025-04-18 */ -#ifndef LIBODF_HANDLE_H -#define LIBODF_HANDLE_H +#ifndef ODF_HANDLE_H +#define ODF_HANDLE_H #include extern struct ubios_od_root *od_root; diff --git a/drivers/ubios_uvb/odf/odf_helper.c b/drivers/firmware/ubios_uvb/odf/odf_helper.c similarity index 76% rename from drivers/ubios_uvb/odf/odf_helper.c rename to drivers/firmware/ubios_uvb/odf/odf_helper.c index 02e9a6a1895f..c170d80c5018 100644 --- a/drivers/ubios_uvb/odf/odf_helper.c +++ b/drivers/firmware/ubios_uvb/odf/odf_helper.c @@ -5,10 +5,13 @@ * Author: zhangrui * Create: 2025-04-18 */ +#define pr_fmt(fmt) "[UVB]: " fmt + #include #include #include -#include "include/libodf.h" +#include "odf_interface.h" +#include "cis_uvb_interface.h" #define UBIOS_OD_INDEX_STRING_MAX 7 #define DECIMAL 10 @@ -125,7 +128,7 @@ int odf_separate_name(char **path, char *name, u64 max_len, u16 *index) return -EOPNOTSUPP; c = *path; - pr_debug(LOG_PRE "odf separate name: path[%s]\n", *path); + pr_debug("odf separate name: path[%s]\n", *path); /* if the first character is a separator, skip it */ if (*c == UBIOS_OD_PATH_SEPARATOR) @@ -141,7 +144,7 @@ int odf_separate_name(char **path, char *name, u64 max_len, u16 *index) if (ret) *index = UBIOS_OD_INVALID_INDEX; } - pr_debug(LOG_PRE "odf separate name: got name[%s]\n", name); + pr_debug("odf separate name: got name[%s]\n", name); break; } else if (*c == '[') { is_index = true; @@ -215,3 +218,69 @@ void odf_get_vs_by_pointer(u8 *data, struct ubios_od_value_struct *vs) break; } } + +bool is_od_root_valid(struct ubios_od_root *root) +{ + if (!root) { + pr_err("odf: root is NULL\n"); + return false; + } + + if (!odf_is_checksum_ok(&(root->header))) { + pr_err("odf: root checksum error.\n"); + return false; + } + + if (strcmp(root->header.name, UBIOS_OD_ROOT_NAME)) { + pr_err("odf: root name[%s] mismatch\n", root->header.name); + return false; + } + + return true; +} + +bool is_od_file_valid(u8 *file) +{ + struct ubios_od_header *header = (struct ubios_od_header *)file; + + if (!header) { + pr_err("odf: file is NULL\n"); + return false; + } + + if (!odf_is_checksum_ok(header)) { + pr_err("odf: file checksum error.\n"); + return false; + } + + return true; +} + +/** +@brief Search all pointer in od root, return the specific od file matched the input name. +@param[in] root start of od root +@param[in] name name of od +@return +@retval = NULL, not found. +@retval != NULL, found. +*/ +u8 *odf_get_od_file(struct ubios_od_root *root, char *name) +{ + u64 i; + + if (!is_od_root_valid(root)) + return NULL; + + if (!name) + return NULL; + + for (i = 0; i < root->count; i++) { + if (!root->odfs[i]) + continue; + + if (strcmp(name, (char *)(u64)root->odfs[i]) == 0) + return (u8 *)(u64)root->odfs[i]; + } + + return NULL; +} diff --git a/drivers/ubios_uvb/include/odf_interface.h b/drivers/firmware/ubios_uvb/odf/odf_interface.h similarity index 46% rename from drivers/ubios_uvb/include/odf_interface.h rename to drivers/firmware/ubios_uvb/odf/odf_interface.h index 41b1fa4450d2..85d230934cdf 100644 --- a/drivers/ubios_uvb/include/odf_interface.h +++ b/drivers/firmware/ubios_uvb/odf/odf_interface.h @@ -9,13 +9,57 @@ #define ODF_INTERFACE_H #include +#define UBIOS_OD_NAME_LEN_MAX 16 +#define UBIOS_OD_VERSION 1 + +#define UBIOS_OD_TYPE_U8 0x1 +#define UBIOS_OD_TYPE_U16 0x2 +#define UBIOS_OD_TYPE_U32 0x3 +#define UBIOS_OD_TYPE_U64 0x4 +#define UBIOS_OD_TYPE_S8 0x5 +#define UBIOS_OD_TYPE_S16 0x6 +#define UBIOS_OD_TYPE_S32 0x7 +#define UBIOS_OD_TYPE_S64 0x8 +#define UBIOS_OD_TYPE_BOOL 0x10 +#define UBIOS_OD_TYPE_CHAR 0x20 +#define UBIOS_OD_TYPE_STRING 0x21 +#define UBIOS_OD_TYPE_STRUCT 0x30 +#define UBIOS_OD_TYPE_TABLE 0x40 +#define UBIOS_OD_TYPE_FILE 0x50 +#define UBIOS_OD_TYPE_LIST 0x80 + +#define UBIOS_OD_ROOT_NAME "root_table" +#define UBIOS_OD_INVALID_INDEX 0xFFFF +#define UBIOS_OD_PATH_SEPARATOR '/' + +#define ODF_FILE_NAME_CALL_ID_SERVICE "call_id_service" +#define ODF_NAME_CIS_GROUP "group" +#define ODF_NAME_CIS_UB "ub" +#define ODF_NAME_CIS_OWNER "owner" +#define ODF_NAME_CIS_CIA "cia" +#define ODF_NAME_CIS_CALL_ID "call_id" +#define ODF_NAME_CIS_USAGE "usage" +#define ODF_NAME_CIS_INDEX "index" +#define ODF_NAME_CIS_FORWARDER_ID "forwarder" + +/* odf processing */ +#define ODF_FILE_NAME_VIRTUAL_BUS "virtual_bus" +#define ODF_NAME_UVB "uvb" +#define ODF_NAME_SECURE "secure" +#define ODF_NAME_DELAY "delay" +#define ODF_NAME_WD "wd" +#define ODF_NAME_OBTAIN "obtain" +#define ODF_NAME_ADDRESS "address" +#define ODF_NAME_BUFFER "buffer" +#define ODF_NAME_SIZE "size" + /* UBRT table info */ -#define ACPI_SIG_UBRT "UBRT" -#define UBRT_UB_CONTROLLER 0 -#define UBRT_UMMU 1 -#define UBRT_UB_MEMORY 2 -#define UBRT_VIRTUAL_BUS 3 -#define UBRT_CALL_ID_SERVICE 4 +#define ACPI_SIG_UBRT "UBRT" +#define UBRT_UB_CONTROLLER 0 +#define UBRT_UMMU 1 +#define UBRT_UB_MEMORY 2 +#define UBRT_VIRTUAL_BUS 3 +#define UBRT_CALL_ID_SERVICE 4 struct ubios_od_value_struct { char *name; @@ -84,7 +128,6 @@ struct ubrt_sub_tables { * @count: count of tables * @sub tables: Sub tables[count] */ - struct ubios_ubrt_table { struct acpi_table_header header; u32 count; diff --git a/drivers/ubios_uvb/odf/odf_trans.c b/drivers/firmware/ubios_uvb/odf/odf_trans.c similarity index 73% rename from drivers/ubios_uvb/odf/odf_trans.c rename to drivers/firmware/ubios_uvb/odf/odf_trans.c index dd5af5dd88bb..ac68c736db20 100644 --- a/drivers/ubios_uvb/odf/odf_trans.c +++ b/drivers/firmware/ubios_uvb/odf/odf_trans.c @@ -5,14 +5,14 @@ * Author: zhangrui * Create: 2025-04-18 */ +#define pr_fmt(fmt) "[UVB]: " fmt + #include #include -#include -#include -#include #include -#include -#include "include/odf_trans.h" +#include "cis_uvb_interface.h" +#include "odf_interface.h" +#include "odf_handle.h" MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ODF Api"); @@ -20,6 +20,10 @@ MODULE_DESCRIPTION("ODF Api"); struct cis_info *g_cis_info; EXPORT_SYMBOL(g_cis_info); +struct uvb_info *g_uvb_info; +EXPORT_SYMBOL(g_uvb_info); + +struct ubios_od_root *od_root; void free_cis_info(void) { @@ -47,7 +51,7 @@ static struct cis_group *create_group_from_vs(struct ubios_od_value_struct *vs) status = odf_get_list_from_struct(vs, ODF_NAME_CIS_CALL_ID, &list); if (status) { - pr_err(ERR_PRE "create group: get [call id list] failed, err = %d\n", status); + pr_err("create group: get [call id list] failed, err = %d\n", status); return NULL; } group = kzalloc(sizeof(struct cis_group) + (sizeof(u32) * list.count), GFP_KERNEL); @@ -56,29 +60,29 @@ static struct cis_group *create_group_from_vs(struct ubios_od_value_struct *vs) status = odf_get_u32_from_struct(vs, ODF_NAME_CIS_OWNER, &(group->owner_user_id)); if (status) { - pr_err(ERR_PRE "create group: get [owner id] failed, err = %d\n", status); + pr_err("create group: get [owner id] failed, err = %d\n", status); goto fail; } status = odf_get_u8_from_struct(vs, ODF_NAME_CIS_USAGE, &(group->usage)); if (status) { - pr_err(ERR_PRE "create group: get [usage] failed, err = %d\n", status); + pr_err("create group: get [usage] failed, err = %d\n", status); goto fail; } status = odf_get_u8_from_struct(vs, ODF_NAME_CIS_INDEX, &(group->index)); if (status) - pr_info(LOG_PRE "cis group not get [index], use default value\n"); + pr_info("cis group not get [index], use default value\n"); status = odf_get_u32_from_struct(vs, ODF_NAME_CIS_FORWARDER_ID, &(group->forwarder_id)); if (status) - pr_info(LOG_PRE "cis group not get forwarder, use default value\n"); + pr_info("cis group not get forwarder, use default value\n"); group->cis_count = list.count; for (i = 0; i < list.count; i++) { status = odf_get_u32_from_list(&list, i, &(group->call_id[i])); if (status) { - pr_err(ERR_PRE "create group: get each call id failed, err = %d\n", status); + pr_err("create group: get each call id failed, err = %d\n", status); goto fail; } } @@ -112,11 +116,10 @@ static int create_cis_info_from_odf(void) for (i = 0; i < ubrt_table->count; i++) { if (ubrt_table->sub_tables[i].type == UBRT_CALL_ID_SERVICE) { - pr_info(LOG_PRE "find cis table in ubrt table\n"); header = memremap(ubrt_table->sub_tables[i].pointer, sizeof(struct ubios_od_header), MEMREMAP_WB); if (!header) { - pr_err(ERR_PRE "failed to map cis table to od header in ACPI\n"); + pr_err("failed to map cis table to od header in ACPI\n"); return -ENOMEM; } sub_table_size = header->total_size; @@ -128,16 +131,16 @@ static int create_cis_info_from_odf(void) } if (!sub_table) { - pr_err(ERR_PRE "failed to get cis table address in ACPI\n"); + pr_err("failed to get cis table address in ACPI\n"); return -ENOMEM; } - pr_info(LOG_PRE "get cis sub table suceess\n"); + pr_info("get cis sub table success\n"); err = odf_get_list_from_table(sub_table, ODF_NAME_CIS_GROUP, &list); if (err) { - pr_err(ERR_PRE "create cis info from odf failed, group not found, err = %d\n", + pr_err("create cis info from odf failed, group not found, err = %d\n", err); - goto fail; + goto free_sub_table; } ub_vs_err = odf_get_vs_from_table(sub_table, ODF_NAME_CIS_UB, &ub_vs); @@ -145,7 +148,7 @@ static int create_cis_info_from_odf(void) err = odf_get_list(od_root, ODF_FILE_NAME_CALL_ID_SERVICE "/" ODF_NAME_CIS_GROUP, &list); if (err) { - pr_err(ERR_PRE "create cis info from odf failed, group not found, err = %d\n", + pr_err("create cis info from odf failed, group not found, err = %d\n", err); return err; } @@ -157,20 +160,20 @@ static int create_cis_info_from_odf(void) g_cis_info = kzalloc(sizeof(struct cis_info) + (sizeof(void *) * list.count), GFP_KERNEL); if (!g_cis_info) { err = -ENOMEM; - goto fail; + goto free_sub_table; } g_cis_info->group_count = list.count; err = odf_get_data_from_list(&list, 0, &vs); if (err) { - pr_err(ERR_PRE "create cis info from odf failed: get data from CIS group failed, err = %d\n", + pr_err("create cis info from odf failed: get data from CIS group failed, err = %d\n", err); goto fail; } for (i = 0; i < list.count; i++) { g_cis_info->groups[i] = create_group_from_vs(&vs); if (!g_cis_info->groups[i]) { - pr_err(ERR_PRE "create cis group from odf failed\n"); + pr_err("create cis group from odf failed\n"); err = -ENODATA; goto fail; } @@ -178,42 +181,40 @@ static int create_cis_info_from_odf(void) } if (!ub_vs_err) { - pr_info(LOG_PRE "found ub struct in cis info\n"); + pr_info("found ub struct in cis info\n"); err = odf_get_u8_from_struct(&ub_vs, ODF_NAME_CIS_USAGE, &(g_cis_info->ub.usage)); if (err) { - pr_err(ERR_PRE "create group: get [usage] failed, err = %d\n", status); + pr_err("create group: get [usage] failed, err = %d\n", status); goto fail; } err = odf_get_u8_from_struct(&ub_vs, ODF_NAME_CIS_INDEX, &(g_cis_info->ub.index)); if (err) - pr_warn(LOG_PRE "ub struct not get [index], use default value\n"); + pr_warn("ub struct not get [index], use default value\n"); err = odf_get_u32_from_struct(&ub_vs, ODF_NAME_CIS_FORWARDER_ID, &(g_cis_info->ub.forwarder_id)); if (err) - pr_warn(LOG_PRE "ub struct not get forwarder, use default value\n"); + pr_warn("ub struct not get forwarder, use default value\n"); } else - pr_warn(LOG_PRE "not found ub struct in cis info\n"); + pr_warn("not found ub struct in cis info\n"); if (sub_table) memunmap(sub_table); - pr_info(LOG_PRE "get cis table from odf success\n"); + pr_info("get cis table from odf success\n"); return 0; + fail: + free_cis_info(); +free_sub_table: if (sub_table) memunmap(sub_table); - free_cis_info(); - return err; } -struct uvb_info *g_uvb_info; -EXPORT_SYMBOL(g_uvb_info); - static void free_uvb_info(void) { u16 i; @@ -227,10 +228,9 @@ static void free_uvb_info(void) (g_uvb_info)->uvbs[i] = NULL; } } - if (g_uvb_info) { - kfree(g_uvb_info); - g_uvb_info = NULL; - } + + kfree(g_uvb_info); + g_uvb_info = NULL; } static struct uvb *create_uvb_from_vs(const struct ubios_od_value_struct *vs) @@ -242,7 +242,7 @@ static struct uvb *create_uvb_from_vs(const struct ubios_od_value_struct *vs) status = odf_get_table_from_struct(vs, ODF_NAME_WD, &wd); if (status) { - pr_err(ERR_PRE "create uvb info: get [wd] failed, [%d]\n", status); + pr_err("create uvb info: get [wd] failed, [%d]\n", status); return NULL; } temp_uvb = kzalloc(sizeof(struct uvb) + @@ -251,7 +251,7 @@ static struct uvb *create_uvb_from_vs(const struct ubios_od_value_struct *vs) return NULL; if (wd.row > UVB_WINDOW_COUNT_MAX) { - pr_err(ERR_PRE "create uvb info: uvb window count[%d] error.\n", wd.row); + pr_err("create uvb info: uvb window count[%d] error.\n", wd.row); goto fail; } temp_uvb->window_count = (u8)wd.row; @@ -261,13 +261,13 @@ static struct uvb *create_uvb_from_vs(const struct ubios_od_value_struct *vs) status = odf_get_u64_from_table(&wd, row, ODF_NAME_OBTAIN, &(temp_uvb->wd[row].obtain)); if (status) { - pr_err(ERR_PRE "create uvb info: get [obtain] failed, %d.\n", status); + pr_err("create uvb info: get [obtain] failed, %d.\n", status); goto fail; } status = odf_get_u64_from_table(&wd, row, ODF_NAME_ADDRESS, &(temp_uvb->wd[row].address)); if (status) { - pr_err(ERR_PRE "create uvb info: get [address] failed, %d.\n", status); + pr_err("create uvb info: get [address] failed, %d.\n", status); goto fail; } (void)odf_get_u64_from_table(&wd, @@ -300,11 +300,10 @@ static int create_uvb_info_from_odf(void) ubrt_table = (struct ubios_ubrt_table *)table; for (i = 0; i < ubrt_table->count; i++) { if (ubrt_table->sub_tables[i].type == UBRT_VIRTUAL_BUS) { - pr_info(LOG_PRE "find uvb table in ubrt table\n"); header = memremap(ubrt_table->sub_tables[i].pointer, sizeof(struct ubios_od_header), MEMREMAP_WB); if (!header) { - pr_err(ERR_PRE "failed to map uvb table to od header in ACPI\n"); + pr_err("failed to map uvb table to od header in ACPI\n"); return -ENOMEM; } sub_table_size = header->total_size; @@ -316,20 +315,20 @@ static int create_uvb_info_from_odf(void) } if (!sub_table) { - pr_err(ERR_PRE "failed to get uvb table address in ACPI\n"); + pr_err("failed to get uvb table address in ACPI\n"); return -ENOMEM; } - pr_info(LOG_PRE "get uvb sub table suceess\n"); + pr_info("get uvb sub table suceess\n"); err = odf_get_list_from_table(sub_table, ODF_NAME_UVB, &uvb_list); if (err) { - pr_err(ERR_PRE "create uvb info: find uvb from od failed, err = %d\n", err); - goto exit; + pr_err("create uvb info: find uvb from od failed, err = %d\n", err); + goto free_sub_table; } } else { err = odf_get_list(od_root, ODF_FILE_NAME_VIRTUAL_BUS "/" ODF_NAME_UVB, &uvb_list); if (err) { - pr_err(ERR_PRE "create uvb info: find uvb from od failed, err = %d\n", err); + pr_err("create uvb info: find uvb from od failed, err = %d\n", err); return err; } } @@ -337,46 +336,44 @@ static int create_uvb_info_from_odf(void) g_uvb_info = kzalloc(sizeof(struct uvb_info) + sizeof(void *) * uvb_list.count, GFP_KERNEL); if (!g_uvb_info) { err = -ENOMEM; - goto exit; + goto free_sub_table; } if (uvb_list.count > UVB_WINDOW_COUNT_MAX) { - pr_err(ERR_PRE "create uvb info: uvb count[%d] error.\n", uvb_list.count); + pr_err("create uvb info: uvb count[%d] error.\n", uvb_list.count); err = -EOVERFLOW; - goto exit; + goto fail; } g_uvb_info->uvb_count = (u8)uvb_list.count; err = odf_get_data_from_list(&uvb_list, 0, &vs); if (err) { - pr_err(ERR_PRE "create uvb info: get uvb failed [%d]\n", err); - goto exit; + pr_err("create uvb info: get uvb failed [%d]\n", err); + goto fail; } for (i = 0; i < uvb_list.count; i++) { g_uvb_info->uvbs[i] = create_uvb_from_vs(&vs); if (!g_uvb_info->uvbs[i]) { - pr_err(ERR_PRE "create uvb from odf failed\n"); + pr_err("create uvb from odf failed\n"); err = -EINVAL; - goto exit; + goto fail; } (void)odf_next_in_list(&uvb_list, &vs); } if (sub_table) memunmap(sub_table); - pr_info(LOG_PRE "get uvb table from odf success\n"); + pr_info("get uvb table from odf success\n"); return 0; -exit: - if (sub_table) - memunmap(sub_table); +fail: free_uvb_info(); +free_sub_table: + if (sub_table) + memunmap(sub_table); return err; } -struct ubios_od_root *od_root; -EXPORT_SYMBOL(od_root); - static void free_odf_info(void) { kfree(od_root); @@ -396,41 +393,41 @@ static int create_odf_info(void) status = acpi_get_table(ACPI_SIG_UBRT, 0, &ubrt_header); if (ACPI_SUCCESS(status)) { - pr_info(LOG_PRE "Success fully get UBRT table\n"); + pr_info("Success fully get UBRT table\n"); return 0; } ret = odf_get_fdt_ubiostbl(&od_root_phys, "linux,ubiostbl"); if (ret) { - pr_err(ERR_PRE "from fdt get ubiostbl failed\n"); - goto fail; + pr_err("from fdt get ubiostbl failed\n"); + return -1; } od_root_origin = (struct ubios_od_root *) memremap(od_root_phys, sizeof(struct ubios_od_header), MEMREMAP_WB); if (!od_root_origin) { - pr_err(ERR_PRE "od_root header memremap failed, od_root addr=%016llx\n", od_root_phys); - goto fail; + pr_err("od_root header memremap failed, od_root addr=%016llx\n", od_root_phys); + return -1; } od_root_size = od_root_origin->header.total_size; memunmap((void *)od_root_origin); od_root_origin = (struct ubios_od_root *)memremap(od_root_phys, od_root_size, MEMREMAP_WB); if (!od_root_origin) { - pr_err(ERR_PRE "od_root memremap failed, od_root addr=%016llx\n", od_root_phys); - goto fail; + pr_err("od_root memremap failed, od_root addr=%016llx\n", od_root_phys); + return -1; } count = od_root_origin->count; od_root = kzalloc(sizeof(struct ubios_od_root) + count * sizeof(u64), GFP_KERNEL); if (!od_root) { - pr_err(ERR_PRE "kmalloc od_root failed\n"); - goto fail; + pr_err("kmalloc od_root failed\n"); + goto free_od_root; } memcpy(&od_root->header, &od_root_origin->header, sizeof(struct ubios_od_header)); od_root->count = od_root_origin->count; for (i = 0; i < od_root->count; i++) { - if (od_root_origin->odfs[i] == UBIOS_OD_EMPTY) + if (!od_root_origin->odfs[i]) continue; od_root->odfs[i] = od_root_origin->odfs[i]; @@ -439,12 +436,11 @@ static int create_odf_info(void) memunmap(od_root_origin); odf_update_checksum(&od_root->header); - pr_info(LOG_PRE "get ubios table success\n"); + pr_info("get ubios table success\n"); return 0; -fail: - free_odf_info(); +free_od_root: if (od_root_origin) memunmap(od_root_origin); @@ -455,28 +451,34 @@ static int __init odf_init(void) { int status; - pr_info(LOG_PRE "start to odf init\n"); status = create_odf_info(); if (status) { - pr_err(ERR_PRE "odf table init failed\n"); + pr_err("odf table init failed\n"); return -1; } status = create_cis_info_from_odf(); if (status) { - pr_err(ERR_PRE "create cis info failed, cis is invalid\n"); - return -1; + pr_err("create cis info failed, cis is invalid\n"); + goto free_odf; } status = create_uvb_info_from_odf(); if (status) { - pr_err(ERR_PRE "create uvb info failed, uvb is invalid\n"); - return -1; + pr_err("create uvb info failed, uvb is invalid\n"); + goto free_cis; } - pr_info(LOG_PRE "odf init success\n"); + pr_info("odf init success\n"); return 0; + +free_cis: + free_cis_info(); +free_odf: + free_odf_info(); + + return -1; } static void __exit odf_exit(void) @@ -485,7 +487,7 @@ static void __exit odf_exit(void) free_cis_info(); free_odf_info(); - pr_info(LOG_PRE "odf exit success\n"); + pr_info("odf exit success\n"); } module_init(odf_init); diff --git a/drivers/ubios_uvb/Makefile b/drivers/ubios_uvb/Makefile deleted file mode 100644 index 5d710f105572..000000000000 --- a/drivers/ubios_uvb/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. -# Create : 2025-04-18 -# Description : cis odf Makefile - -obj-$(CONFIG_UDFI) += odf/ -obj-$(CONFIG_UDFI) += cis/ diff --git a/drivers/ubios_uvb/cis/cis_core.c b/drivers/ubios_uvb/cis/cis_core.c deleted file mode 100644 index 87df2053af21..000000000000 --- a/drivers/ubios_uvb/cis/cis_core.c +++ /dev/null @@ -1,176 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: Call ID Service (CIS) core module, manages inter-process communication - * via call identifiers with local/remote handling and UVB integration. - * Author: zhangrui - * Create: 2025-04-18 - */ - -#include -#include -#include - -#include "cis_info_process.h" -#include "uvb_info_process.h" - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Call ID Service Framework"); - -static bool cis_call_for_me(u32 receiver_id) -{ - if ((receiver_id == UBIOS_USER_ID_ALL) || - (receiver_id == ubios_get_user_type(UBIOS_MY_USER_ID)) || - (receiver_id == UBIOS_MY_USER_ID)) { - return true; - } - - return false; -} - -static bool cis_call_for_local(u32 receiver_id) -{ - if ((ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_UB_DEVICE) || - (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_INTERGRATED_PCIE_DEVICE)) { - return true; - } - - return false; -} - -int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, - struct cis_message *msg, - bool is_sync) -{ - u32 forwarder_id; - u32 exact_receiver_id; - u8 usage; - u8 index; - int res; - struct udfi_para para = { 0 }; - - pr_debug(LOG_PRE "cis remote call: call id %08x, sender id %08x, receiver id %08x\n", - call_id, sender_id, receiver_id); - res = get_cis_group_info(call_id, receiver_id, - &usage, &index, &exact_receiver_id, &forwarder_id); - if (res) { - pr_err(ERR_PRE "can't get group info, call id=%08x, receiver id=%08x\n", - call_id, receiver_id); - return -EOPNOTSUPP; - } - - para.input = msg->input; - para.input_size = msg->input_size; - para.output = msg->output; - para.output_size = msg->p_output_size; - para.message_id = call_id; - para.receiver_id = exact_receiver_id; - para.sender_id = sender_id; - para.forwarder_id = forwarder_id; - - if (usage != CIS_USAGE_UVB) { - pr_err(ERR_PRE "method not supported, call id=%08x, receiver id=%08x, usage=%d\n", - call_id, receiver_id, usage); - return -EOPNOTSUPP; - } - - if (is_sync) - return cis_call_uvb_sync(index, ¶); - - return cis_call_uvb(index, ¶); -} - -/** - * cis_call - Trigger a cis call with given aruguments. - * - * @call_id: call id that identifies which cis call will be triggered. - * @sender_id: user id of sender. - * @receiver_id: user id of receiver. - * @msg: the data that the user needs to transmit. - * @is_sync: whether to use a synchronous interface. - * - * Search for cia (call id attribute) in cis info with given call id and receiver id. - * The `usage` property of cia determines which method to used (uvb/arch call). - * Return 0 if cis call succeeds or communication method is not supported, - * else return cis error code. - */ -int cis_call_by_uvb(u32 call_id, u32 sender_id, u32 receiver_id, - struct cis_message *msg, bool is_sync) -{ - int ret; - msg_handler func; - - pr_info(LOG_PRE "cis call: call id %08x, sender id %08x, receiver id %08x\n", - call_id, sender_id, receiver_id); - if (cis_call_for_me(receiver_id) || cis_call_for_local(receiver_id)) { - func = search_local_cis_func(call_id, receiver_id); - if (func) { - ret = func(msg); - if (ret) { - pr_err(ERR_PRE "cis call execute registered cis func failed\n"); - return ret; - } - pr_info(LOG_PRE "cis call execute registered cis func success\n"); - return 0; - } - pr_err(ERR_PRE "can't found cis func for callid=%08x, receiver_id=%08x\n", - call_id, receiver_id); - return -EOPNOTSUPP; - } - - return cis_call_remote(call_id, sender_id, receiver_id, msg, is_sync); -} -EXPORT_SYMBOL(cis_call_by_uvb); - -int cis_module_lock_func(int lock) -{ - if (lock) - return try_module_get(THIS_MODULE) ? 0 : -EINVAL; - - module_put(THIS_MODULE); - - return 0; -} -EXPORT_SYMBOL(cis_module_lock_func); - -static int __init cis_init(void) -{ - int err = 0; - - err = init_cis_table(); - if (err) { - pr_err(ERR_PRE "cis info init failed, err=%d\n", err); - goto fail; - } - - err = init_global_vars(); - if (err) { - pr_err(ERR_PRE "global vars malloc failed, err=%d\n", err); - goto free_global; - } - - err = init_uvb(); - if (err) { - pr_err(ERR_PRE "uvb init failed, err=%d\n", err); - goto fail; - } - - pr_info(LOG_PRE "cis init success\n"); - return 0; -fail: - uninit_uvb(); -free_global: - free_global_vars(); - - return err; -} - -static void __exit cis_exit(void) -{ - uninit_uvb(); - free_global_vars(); - pr_info(LOG_PRE "cis exit success\n"); -} - -module_init(cis_init); -module_exit(cis_exit); diff --git a/drivers/ubios_uvb/cis/cis_info_process.c b/drivers/ubios_uvb/cis/cis_info_process.c deleted file mode 100644 index 285d40fcdae6..000000000000 --- a/drivers/ubios_uvb/cis/cis_info_process.c +++ /dev/null @@ -1,240 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: Call ID Service (CIS) info processing module, handles CIS init, - * func register/lookup and group info retrieval. - * Author: zhangrui - * Create: 2025-04-18 - */ - -#include -#include -#include -#include -#include -#include "cis_info_process.h" - -LIST_HEAD(g_local_cis_list); -DEFINE_SPINLOCK(cis_register_lock); -struct cis_message *io_param_sync; - -int init_cis_table(void) -{ - if (!g_cis_info) { - pr_err(ERR_PRE "failed to get cis info from odf\n"); - return -EOPNOTSUPP; - } - - return 0; -} - -int init_global_vars(void) -{ - io_param_sync = kzalloc(sizeof(struct cis_message), GFP_KERNEL); - if (!io_param_sync) - return -ENOMEM; - - return 0; -} - -void free_global_vars(void) -{ - kfree(io_param_sync); - io_param_sync = NULL; -} - -static bool is_call_id_supported(struct cis_group *group, u32 call_id) -{ - u32 i; - - for (i = 0; i < group->cis_count; i++) { - pr_debug(LOG_PRE "cia call_id: %08x\n", group->call_id[i]); - if (group->call_id[i] == call_id) - return true; - } - - return false; -} - -int get_cis_group_info(u32 call_id, u32 receiver_id, - u8 *usage, u8 *index, - u32 *exact_receiver_id, u32 *forwarder_id) -{ - u32 i; - - if (!g_cis_info) { - pr_err(ERR_PRE "can't get cis_info from odf\n"); - return -EOPNOTSUPP; - } - - for (i = 0; i < g_cis_info->group_count; i++) { - if (receiver_id != g_cis_info->groups[i]->owner_user_id && - receiver_id != ubios_get_user_type(g_cis_info->groups[i]->owner_user_id)) - continue; - if (is_call_id_supported(g_cis_info->groups[i], call_id)) { - *usage = g_cis_info->groups[i]->usage; - *index = g_cis_info->groups[i]->index; - *exact_receiver_id = g_cis_info->groups[i]->owner_user_id; - *forwarder_id = g_cis_info->groups[i]->forwarder_id; - return 0; - } - } - - if (ubios_get_user_type(receiver_id) == UBIOS_USER_ID_UB_DEVICE) { - *usage = g_cis_info->ub.usage; - *index = g_cis_info->ub.index; - *exact_receiver_id = receiver_id; - *forwarder_id = g_cis_info->ub.forwarder_id; - pr_info(LOG_PRE "refresh info, usage=%d, index=%d, forward_id=%08x\n", - *usage, *index, *forwarder_id); - return 0; - } - - pr_err(ERR_PRE "call id: %08x not supported\n", call_id); - - return -EOPNOTSUPP; -} - -/* -Search Call ID Service owned by this component, return the function. -*/ -struct cis_func_node *search_local_cis_func_node(u32 call_id, u32 receiver_id) -{ - struct cis_func_node *cis_node = NULL; - struct cis_func_node *tmp; - - rcu_read_lock(); - list_for_each_entry_rcu(tmp, &g_local_cis_list, link) { - if ((tmp->call_id == call_id) && (tmp->receiver_id == receiver_id)) { - cis_node = tmp; - break; - } - } - rcu_read_unlock(); - - return cis_node; -} - -/* -Search local Call ID Service Functon according Call ID, return the function. -*/ -msg_handler search_local_cis_func(u32 call_id, u32 receiver_id) -{ - struct cis_func_node *cis_node; - - cis_node = search_local_cis_func_node(call_id, receiver_id); - if (cis_node) - return cis_node->func; - - return NULL; -} - -/* -Search Call ID Service owned by this component, return the function. -*/ -msg_handler search_my_cis_func(u32 call_id) -{ - return search_local_cis_func(call_id, UBIOS_MY_USER_ID); -} - -/* -Register a Call ID Service -@call_id - UBIOS Interface ID -@receiver_id - UBIOS User ID who own this CIS -@func - Callback function of Call ID -*/ -int register_local_cis_func(u32 call_id, u32 receiver_id, msg_handler func) -{ - struct cis_func_node *p; - unsigned long flags; - - pr_info(LOG_PRE "cis register: call_id[%08x], receiver_id[%08x]\n", call_id, receiver_id); - if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { - pr_err(ERR_PRE "register is not uvb call\n"); - return -EINVAL; - } - if (!func) { - pr_err(ERR_PRE "register func is NULL\n"); - return -EINVAL; - } - - /* check is this Call ID already has a funciton */ - if (search_local_cis_func_node(call_id, receiver_id)) { - pr_err(ERR_PRE "cis register: call_id[%08x], receiver_id[%08x], already register func\n", - call_id, receiver_id); - return -EINVAL; - } - - p = kcalloc(1, sizeof(struct cis_func_node), GFP_KERNEL); - if (!p) - return -ENOMEM; - - p->call_id = call_id; - p->receiver_id = receiver_id; - p->func = func; - - spin_lock_irqsave(&cis_register_lock, flags); - list_add_tail_rcu(&p->link, &g_local_cis_list); - spin_unlock_irqrestore(&cis_register_lock, flags); - pr_info(LOG_PRE "register cis func success\n"); - - return 0; -} -EXPORT_SYMBOL(register_local_cis_func); - -/* -Register a Call ID Service owned by this component -@call_id - UBIOS Interface ID -@func - Callback function of Call ID -*/ -int register_my_cis_func(u32 call_id, msg_handler func) -{ - return register_local_cis_func(call_id, UBIOS_MY_USER_ID, func); -} -EXPORT_SYMBOL(register_my_cis_func); - - -/* -Unregister a Call ID Service -@call_id - UBIOS Interface ID -@receiver_id - UBIOS User ID who own this CIS -*/ -int unregister_local_cis_func(u32 call_id, u32 receiver_id) -{ - struct cis_func_node *p; - unsigned long flags; - - pr_info(LOG_PRE "cis unregister: call_id[%08x], receiver_id[%08x]\n", call_id, receiver_id); - if (UBIOS_GET_MESSAGE_FLAG(call_id) != UBIOS_CALL_ID_FLAG) { - pr_err(ERR_PRE "register is not uvb call\n"); - return -EINVAL; - } - - p = search_local_cis_func_node(call_id, receiver_id); - if (!p) { - pr_err(ERR_PRE "cis unregister: call_id[%08x], receiver_id[%08x] not find func node.\n", - call_id, receiver_id); - return -EINVAL; - } - - spin_lock_irqsave(&cis_register_lock, flags); - list_del_rcu(&p->link); - spin_unlock_irqrestore(&cis_register_lock, flags); - synchronize_rcu(); - - kfree(p); - pr_info(LOG_PRE "unregister cis func success\n"); - - return 0; -} -EXPORT_SYMBOL(unregister_local_cis_func); - -/* -Unregister a Call ID Service owned by this component -@call_id - UBIOS Interface ID -*/ -int unregister_my_cis_func(u32 call_id) -{ - return unregister_local_cis_func(call_id, UBIOS_MY_USER_ID); -} -EXPORT_SYMBOL(unregister_my_cis_func); diff --git a/drivers/ubios_uvb/cis/io_param.c b/drivers/ubios_uvb/cis/io_param.c deleted file mode 100644 index a38bf37ec450..000000000000 --- a/drivers/ubios_uvb/cis/io_param.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: CIS message processing, handles output preparation. - * Author: zhangrui - * Create: 2025-04-18 - */ - -#include -#include -#include "io_param.h" - -void ubios_uvb_free_io_param(struct cis_message *param, u8 free_flag) -{ - if (free_flag == 1 && param->input) - kfree(param->input); - if (free_flag == 1 && param->output) - kfree(param->output); - if (free_flag == 0 && param->input) - memunmap(param->input); - - kfree(param); -} - -void ubios_prepare_output_data(struct cis_message *io_param, void *output, u32 *output_size) -{ - memcpy(output, io_param->output, *(io_param->p_output_size)); - *output_size = *(io_param->p_output_size); -} diff --git a/drivers/ubios_uvb/cis/uvb_info_process.h b/drivers/ubios_uvb/cis/uvb_info_process.h deleted file mode 100644 index dd2b758af1e7..000000000000 --- a/drivers/ubios_uvb/cis/uvb_info_process.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: uvb info process header - * Author: zhangrui - * Create: 2025-04-18 - */ - -#ifndef UVB_INFO_PROCESS_H -#define UVB_INFO_PROCESS_H - -#include -#include -#include - -#include "cis_uvb_interface.h" - -#define CIS_USAGE_UVB 2 - -#define UVB_POLL_TIME_INTERVAL (100) /* 100us */ -#define UVB_POLL_TIMEOUT (1000) /* 1000ms */ -#define UVB_TIMEOUT_WINDOW_OBTAIN (10000) /* 10000ms */ -#define UVB_POLL_TIMEOUT_TIMES (10000) /* 10000 times */ - -int init_uvb(void); -void uninit_uvb(void); -void uninit_uvb_sync(void); - -#define MAX_UVB_LOCK_IN_BITS 8 -struct uvb_window_lock { - atomic_t lock; - u64 window_address; - struct hlist_node node; -}; -extern DECLARE_HASHTABLE(uvb_lock_table, MAX_UVB_LOCK_IN_BITS); - -struct uvb_window_description *uvb_occupy_window(struct uvb *uvb, u32 sender_id, u64 *wd_obtain); -int uvb_free_window(struct uvb_window *window); -int uvb_fill_window(struct uvb_window_description *wd, struct uvb_window *wd_addr, - struct cis_message *io_params, struct udfi_para *para); -int uvb_poll_window_call(struct uvb_window *window, u32 call_id); -int uvb_poll_window_call_sync(struct uvb_window *window, u32 call_id); -int uvb_get_output_data(struct uvb_window *window, - struct cis_message *io_param, void *output, u32 *output_size); - -/* cis call by uvb */ -int cis_call_uvb(u8 index, struct udfi_para *para); -int cis_call_uvb_sync(u8 index, struct udfi_para *para); -#endif diff --git a/drivers/ubios_uvb/odf/Makefile b/drivers/ubios_uvb/odf/Makefile deleted file mode 100644 index 3c76e02dee82..000000000000 --- a/drivers/ubios_uvb/odf/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. -# Create : 2025-04-18 -# Description : odf Makefile - -obj-y = odf_get_fdt.o - -obj-$(CONFIG_UDFI_ODF) += odf.o -odf-objs := odf_trans.o odf_data.o odf_file.o odf_helper.o - -ccflags-y += -I$(srctree)/$(src)/../include diff --git a/drivers/ubios_uvb/odf/include/libodf.h b/drivers/ubios_uvb/odf/include/libodf.h deleted file mode 100644 index 74e9b98bf878..000000000000 --- a/drivers/ubios_uvb/odf/include/libodf.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: libodf header - * Author: zhangrui - * Create: 2025-04-18 - */ -#ifndef LIBODF_H -#define LIBODF_H -#include "cis_uvb_interface.h" -#include "libodf_handle.h" - -#define UBIOS_OD_NAME_LEN_MAX 16 -#define UBIOS_OD_VERSION 1 -#define UBIOS_OD_EMPTY 0 - -#define UBIOS_OD_TYPE_U8 0x1 -#define UBIOS_OD_TYPE_U16 0x2 -#define UBIOS_OD_TYPE_U32 0x3 -#define UBIOS_OD_TYPE_U64 0x4 -#define UBIOS_OD_TYPE_S8 0x5 -#define UBIOS_OD_TYPE_S16 0x6 -#define UBIOS_OD_TYPE_S32 0x7 -#define UBIOS_OD_TYPE_S64 0x8 -#define UBIOS_OD_TYPE_BOOL 0x10 -#define UBIOS_OD_TYPE_CHAR 0x20 -#define UBIOS_OD_TYPE_STRING 0x21 -#define UBIOS_OD_TYPE_STRUCT 0x30 -#define UBIOS_OD_TYPE_TABLE 0x40 -#define UBIOS_OD_TYPE_FILE 0x50 -#define UBIOS_OD_TYPE_LIST 0x80 - -#define UBIOS_OD_ROOT_NAME "root_table" - -#define UBIOS_OD_INVALID_INDEX 0xFFFF - -#define UBIOS_OD_PATH_SEPARATOR '/' -#endif diff --git a/drivers/ubios_uvb/odf/include/odf_trans.h b/drivers/ubios_uvb/odf/include/odf_trans.h deleted file mode 100644 index 1d03979ee8f5..000000000000 --- a/drivers/ubios_uvb/odf/include/odf_trans.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: odf trans header - * Author: zhangrui - * Create: 2025-04-18 - */ - -#ifndef ODF_TRANS_H -#define ODF_TRANS_H -#include "libodf.h" - -#define ODF_FILE_NAME_CALL_ID_SERVICE "call_id_service" -#define ODF_NAME_CIS_GROUP "group" -#define ODF_NAME_CIS_UB "ub" -#define ODF_NAME_CIS_OWNER "owner" -#define ODF_NAME_CIS_CIA "cia" -#define ODF_NAME_CIS_CALL_ID "call_id" -#define ODF_NAME_CIS_USAGE "usage" -#define ODF_NAME_CIS_INDEX "index" -#define ODF_NAME_CIS_FORWARDER_ID "forwarder" - -/* odf processing */ -#define ODF_FILE_NAME_VIRTUAL_BUS "virtual_bus" -#define ODF_NAME_UVB "uvb" -#define ODF_NAME_SECURE "secure" -#define ODF_NAME_DELAY "delay" -#define ODF_NAME_WD "wd" -#define ODF_NAME_OBTAIN "obtain" -#define ODF_NAME_ADDRESS "address" -#define ODF_NAME_BUFFER "buffer" -#define ODF_NAME_SIZE "size" - -#endif diff --git a/drivers/ubios_uvb/odf/odf_file.c b/drivers/ubios_uvb/odf/odf_file.c deleted file mode 100644 index a37d65b05242..000000000000 --- a/drivers/ubios_uvb/odf/odf_file.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - * Description: ODF file validation and retrieval functions - * Author: zhangrui - * Create: 2025-04-18 - */ -#include -#include -#include "include/libodf.h" - -bool is_od_root_valid(struct ubios_od_root *root) -{ - if (!root) { - pr_err(ERR_PRE "odf: root is NULL\n"); - return false; - } - - if (!odf_is_checksum_ok(&(root->header))) { - pr_err(ERR_PRE "odf: root checksum error.\n"); - return false; - } - - if (strcmp(root->header.name, UBIOS_OD_ROOT_NAME)) { - pr_err(ERR_PRE "odf: root name[%s] mismatch\n", root->header.name); - return false; - } - - return true; -} - -bool is_od_file_valid(u8 *file) -{ - struct ubios_od_header *header = (struct ubios_od_header *)file; - - if (!header) { - pr_err(ERR_PRE "odf: file is NULL\n"); - return false; - } - - if (!odf_is_checksum_ok(header)) { - pr_err(ERR_PRE "odf: file checksum error.\n"); - return false; - } - - return true; -} - -/** -@brief Search all pointer in od root, return the specific od file matched the input name. -@param[in] root start of od root -@param[in] name name of od -@return -@retval = NULL, not found. -@retval != NULL, found. -*/ -u8 *odf_get_od_file(struct ubios_od_root *root, char *name) -{ - u64 i; - - if (!is_od_root_valid(root)) - return NULL; - - if (!name) - return NULL; - - for (i = 0; i < root->count; i++) { - if (root->odfs[i] == UBIOS_OD_EMPTY) - continue; - - if (strcmp(name, (char *)(u64)root->odfs[i]) == 0) - return (u8 *)(u64)root->odfs[i]; - } - - return NULL; -} diff --git a/include/ubios/cis.h b/include/linux/firmware/ubios/cis.h similarity index 92% rename from include/ubios/cis.h rename to include/linux/firmware/ubios/cis.h index d8d48c9b5762..a8418a7b53a0 100644 --- a/include/ubios/cis.h +++ b/include/linux/firmware/ubios/cis.h @@ -41,13 +41,11 @@ struct cis_message { // cis call int cis_call_by_uvb(u32 call_id, u32 sender_id, u32 receiver_id, struct cis_message *msg, bool is_sync); +int uvb_polling_sync(void *data); // cis register typedef int (*msg_handler)(struct cis_message *msg); int register_local_cis_func(u32 call_id, u32 receiver_id, msg_handler func); -int register_my_cis_func(u32 call_id, msg_handler func); int unregister_local_cis_func(u32 call_id, u32 receiver_id); -int unregister_my_cis_func(u32 call_id); -int cis_module_lock_func(int lock); #endif -- Gitee From 27adc37064a12cb94c8e265382a76de7c8b5b407 Mon Sep 17 00:00:00 2001 From: Junlong Zheng Date: Tue, 28 Oct 2025 19:14:38 +0800 Subject: [PATCH 05/48] ub:ubus: Support for UB Memory Decoder general layer implementation commit fa547b73756dd4755d0258824a230b31f9ea7b4f openEuler Supports triggering the memory decoder's flush function via ubus driver and implements RAS processing. Signed-off-by: Yuhao Xiang Signed-off-by: Junlong Zheng Signed-off-by: yuhao_zhang --- drivers/ub/ubus/Makefile | 2 +- drivers/ub/ubus/memory.c | 186 ++++++++++++++++++++++++++++++ drivers/ub/ubus/memory.h | 59 ++++++++++ drivers/ub/ubus/ubus_controller.h | 4 + drivers/ub/ubus/ubus_entity.c | 3 + include/ub/ubus/ub-mem-decoder.h | 100 ++++++++++++++++ include/ub/ubus/ubus.h | 3 + 7 files changed, 356 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/ubus/memory.c create mode 100644 drivers/ub/ubus/memory.h create mode 100644 include/ub/ubus/ub-mem-decoder.h diff --git a/drivers/ub/ubus/Makefile b/drivers/ub/ubus/Makefile index 7456bdb0a787..59505977dd2f 100644 --- a/drivers/ub/ubus/Makefile +++ b/drivers/ub/ubus/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_UB_UBUS) += msi/ ubus-y := ubus_driver.o sysfs.o ubus_controller.o msg.o ubus_config.o port.o cc.o eid.o cna.o route.o ubus-y += enum.o resource.o ubus_entity.o reset.o cap.o interrupt.o decoder.o omm.o ioctl.o eu.o link.o -ubus-y += instance.o pool.o +ubus-y += instance.o pool.o memory.o ubus-y += services/ras.o services/service.o services/gucd.o ubus-y += services/hotplug/hotplug_core.o services/hotplug/hotplug_ctrl.o diff --git a/drivers/ub/ubus/memory.c b/drivers/ub/ubus/memory.c new file mode 100644 index 000000000000..e7b3144db4bd --- /dev/null +++ b/drivers/ub/ubus/memory.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#define pr_fmt(fmt) "ubus memory: " fmt + +#include +#include "ubus.h" +#include "ubus_controller.h" +#include "memory.h" + +static ubmem_ras_handler handler; + +static bool ub_mem_uent_valid(struct ub_entity *uent) +{ + if (!uent || !uent->ubc) + return false; + + return is_ibus_controller(uent); +} + +void ub_mem_decoder_init(struct ub_entity *uent) +{ + struct ub_bus_controller *ubc; + int ret; + + if (!ub_mem_uent_valid(uent)) + return; + + ubc = uent->ubc; + if (ubc->ops && ubc->ops->mem_decoder_create) { + ret = ubc->ops->mem_decoder_create(ubc); + WARN_ON(ret); + } else { + dev_warn(&ubc->dev, + "ubc ops or ubc ops mem_decoder_create is null.\n"); + } +} + +void ub_mem_decoder_uninit(struct ub_entity *uent) +{ + struct ub_bus_controller *ubc; + + if (!ub_mem_uent_valid(uent)) + return; + + ubc = uent->ubc; + if (ubc->ops && ubc->ops->mem_decoder_remove) + ubc->ops->mem_decoder_remove(ubc); + else + dev_warn(&ubc->dev, "ubc ops mem_decoder_remove is null.\n"); +} + +void ub_mem_ras_handler_register(ubmem_ras_handler rh) +{ + handler = rh; +} +EXPORT_SYMBOL_GPL(ub_mem_ras_handler_register); + +void ub_mem_ras_handler_unregister(void) +{ + handler = NULL; +} +EXPORT_SYMBOL_GPL(ub_mem_ras_handler_unregister); + +ubmem_ras_handler ub_mem_ras_handler_get(void) +{ + return handler; +} +EXPORT_SYMBOL_GPL(ub_mem_ras_handler_get); + +void ub_mem_init_usi(struct ub_entity *uent) +{ + if (!uent->ubc) { + pr_err("ubc not exist, can't init usi\n"); + return; + } + + if (uent->ubc->ops && uent->ubc->ops->register_ubmem_irq) + uent->ubc->ops->register_ubmem_irq(uent->ubc); + else + dev_warn(&uent->ubc->dev, "ubc ops register_ubmem_irq is null.\n"); +} + +void ub_mem_uninit_usi(struct ub_entity *uent) +{ + if (!uent->ubc) { + pr_err("ubc not exist, can't uninit usi\n"); + return; + } + + if (uent->ubc->ops && uent->ubc->ops->unregister_ubmem_irq) + uent->ubc->ops->unregister_ubmem_irq(uent->ubc); + else + dev_warn(&uent->ubc->dev, "ubc ops unregister_ubmem_irq is null.\n"); +} + +void ub_mem_drain_start(u32 scna) +{ + struct ub_mem_device *mem_device; + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return; + } + + mem_device = ubc->mem_device; + if (!mem_device) { + dev_err(&ubc->dev, "ubc mem_device is null.\n"); + return; + } + + if (mem_device->ops && mem_device->ops->mem_drain_start) + mem_device->ops->mem_drain_start(mem_device); + else + dev_warn(mem_device->dev, "ub mem_device ops mem_drain_start is null.\n"); +} +EXPORT_SYMBOL_GPL(ub_mem_drain_start); + +int ub_mem_drain_state(u32 scna) +{ + struct ub_mem_device *mem_device; + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return -ENODEV; + } + + mem_device = ubc->mem_device; + if (!mem_device) { + dev_err(&ubc->dev, "ubc mem_device is null.\n"); + return -EINVAL; + } + + if (mem_device->ops && mem_device->ops->mem_drain_state) + return mem_device->ops->mem_drain_state(mem_device); + + dev_warn(mem_device->dev, "ub memory decoder ops mem_drain_state is null.\n"); + return 0; +} +EXPORT_SYMBOL_GPL(ub_mem_drain_state); + +int ub_mem_get_numa_id(u32 scna) +{ + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return NUMA_NO_NODE; + } + + return pxm_to_node(ubc->attr.proximity_domain); +} +EXPORT_SYMBOL_GPL(ub_mem_get_numa_id); + +bool ub_memory_validate_pa(u32 scna, u64 pa_start, u64 pa_end, bool cacheable) +{ + struct ub_mem_device *mem_device; + struct ub_bus_controller *ubc; + + ubc = ub_find_bus_controller_by_cna(scna); + if (!ubc) { + pr_err("No ubc has cna of %u\n", scna); + return false; + } + + mem_device = ubc->mem_device; + if (!mem_device) { + dev_err(&ubc->dev, "ubc mem_device is null.\n"); + return false; + } + + if (mem_device->ops && mem_device->ops->mem_validate_pa) + return mem_device->ops->mem_validate_pa(ubc, pa_start, pa_end, + cacheable); + + dev_warn(mem_device->dev, "ub memory decoder ops mem_drain_state is null.\n"); + return false; +} +EXPORT_SYMBOL_GPL(ub_memory_validate_pa); diff --git a/drivers/ub/ubus/memory.h b/drivers/ub/ubus/memory.h new file mode 100644 index 000000000000..7c841b466f3e --- /dev/null +++ b/drivers/ub/ubus/memory.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#ifndef __MEMORY_H__ +#define __MEMORY_H__ + +#include +#include +#include +#include + +#define MAX_RAS_ERROR_SOURCES_CNT 256 + +void ub_mem_decoder_init(struct ub_entity *uent); +void ub_mem_decoder_uninit(struct ub_entity *uent); +void ub_mem_init_usi(struct ub_entity *uent); +void ub_mem_uninit_usi(struct ub_entity *uent); + +struct ub_mem_ras_err_info { + enum ras_err_type type; + u64 hpa; +}; + +struct ub_mem_ras_ctx { + DECLARE_KFIFO(ras_fifo, struct ub_mem_ras_err_info, + MAX_RAS_ERROR_SOURCES_CNT); +}; + +struct ub_mem_device_ops { + void (*mem_drain_start)(struct ub_mem_device *mem_device); + int (*mem_drain_state)(struct ub_mem_device *mem_device); + bool (*mem_validate_pa)(struct ub_bus_controller *ubc, u64 pa_start, + u64 pa_end, bool cacheable); + + KABI_RESERVE(1) + KABI_RESERVE(2) + KABI_RESERVE(3) + KABI_RESERVE(4) + KABI_RESERVE(5) + KABI_RESERVE(6) + KABI_RESERVE(7) + KABI_RESERVE(8) +}; + +struct ub_mem_device { + struct device *dev; + struct ub_entity *uent; + struct ub_mem_ras_ctx ras_ctx; + int ubmem_irq_num; + const struct ub_mem_device_ops *ops; + void *priv_data; + + KABI_RESERVE(1) + KABI_RESERVE(2) +}; + +#endif /* __MEMORY_H__ */ diff --git a/drivers/ub/ubus/ubus_controller.h b/drivers/ub/ubus/ubus_controller.h index ae31e7c45238..4b3c7a74a414 100644 --- a/drivers/ub/ubus/ubus_controller.h +++ b/drivers/ub/ubus/ubus_controller.h @@ -11,6 +11,10 @@ struct ub_bus_controller_ops { int (*eu_table_init)(struct ub_bus_controller *ubc); void (*eu_table_uninit)(struct ub_bus_controller *ubc); int (*eu_cfg)(struct ub_bus_controller *ubc, bool flag, u32 eid, u16 upi); + int (*mem_decoder_create)(struct ub_bus_controller *ubc); + void (*mem_decoder_remove)(struct ub_bus_controller *ubc); + void (*register_ubmem_irq)(struct ub_bus_controller *ubc); + void (*unregister_ubmem_irq)(struct ub_bus_controller *ubc); void (*register_decoder_base_addr)(struct ub_bus_controller *ubc, u64 *cmd_queue, u64 *event_queue); int (*entity_enable)(struct ub_entity *uent, u8 enable); diff --git a/drivers/ub/ubus/ubus_entity.c b/drivers/ub/ubus/ubus_entity.c index dc9bccff9044..fcea27373ccb 100644 --- a/drivers/ub/ubus/ubus_entity.c +++ b/drivers/ub/ubus/ubus_entity.c @@ -17,6 +17,7 @@ #include "eid.h" #include "cna.h" #include "resource.h" +#include "memory.h" #include "ubus_controller.h" #include "ubus_driver.h" #include "ubus_inner.h" @@ -438,6 +439,7 @@ void ub_start_ent(struct ub_entity *uent) WARN_ON(ret); ub_create_sysfs_dev_files(uent); + ub_mem_decoder_init(uent); if (!((is_p_device(uent) || is_p_idevice(uent)) && is_dynamic(uent->bi))) { uent->match_driver = true; @@ -531,6 +533,7 @@ void ub_remove_ent(struct ub_entity *uent) list_del(&uent->node); up_write(&ub_bus_sem); + ub_mem_decoder_uninit(uent); ub_uninit_capabilities(uent); ub_unconfigure_ent(uent); ub_entity_unset_mmio(uent); diff --git a/include/ub/ubus/ub-mem-decoder.h b/include/ub/ubus/ub-mem-decoder.h new file mode 100644 index 000000000000..56ba2bed34b0 --- /dev/null +++ b/include/ub/ubus/ub-mem-decoder.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#ifndef _UB_UBUS_UB_MEM_DECODER_H_ +#define _UB_UBUS_UB_MEM_DECODER_H_ + +#include + +enum ras_err_type { + UB_MEM_ATOMIC_DATA_ERR = 0, + UB_MEM_READ_DATA_ERR, + UB_MEM_FLOW_POISON, + UB_MEM_FLOW_READ_AUTH_POISON, + UB_MEM_FLOW_READ_AUTH_RESPERR, + UB_MEM_TIMEOUT_POISON, + UB_MEM_TIMEOUT_RESPERR, + UB_MEM_READ_DATA_POISON, + UB_MEM_READ_DATA_RESPERR, + MAR_NOPORT_VLD_INT_ERR, + MAR_FLUX_INT_ERR, + MAR_WITHOUT_CXT_ERR, + RSP_BKPRE_OVER_TIMEOUT_ERR, + MAR_NEAR_AUTH_FAIL_ERR, + MAR_FAR_AUTH_FAIL_ERR, + MAR_TIMEOUT_ERR, + MAR_ILLEGAL_ACCESS_ERR, + REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR, +}; + +typedef int (*ubmem_ras_handler)(u64, enum ras_err_type); + +#ifdef CONFIG_UB_UBUS + +/* + * ub_mem_ras_handler_register - register ub memory ras handler for OBMM + * @handler: OBMM ras handler + */ +void ub_mem_ras_handler_register(ubmem_ras_handler handler); + +/* + * ub_mem_ras_handler_unregister - unregister ub memory ras handler for OBMM + */ +void ub_mem_ras_handler_unregister(void); + +/* + * ub_mem_ras_handler_get - get ub memory ras handler + * RETURN VALUE: ubmem_ras_handler + */ +ubmem_ras_handler ub_mem_ras_handler_get(void); + +/* + * ub_mem_drain_start - start ub memory drain + * @scna: source cna + */ +void ub_mem_drain_start(u32 scna); + +/* + * ub_mem_drain_state - whether ub memory drain has been finished + * @scna: source cna + * RETURN VALUE: + * 0 if drain not finish; 1 if drain finish + * other if failed. + */ +int ub_mem_drain_state(u32 scna); + +/* + * ub_mem_get_numa_id - get ubc numa id from scna + * @scna: source cna + * RETURN VALUE: + * numa id + */ +int ub_mem_get_numa_id(u32 scna); + +/* + * ub_memory_validate_pa - Determine whether hpa is valid + * @scna: source cna + * @pa_start: hpa start address + * @pa_end: hpa end address + * @cacheable: cacheable flag + * RETURN VALUE: + * true if hpa is valid + * false if hpa is invalid + */ +bool ub_memory_validate_pa(u32 scna, u64 pa_start, u64 pa_end, bool cacheable); + +#else /* CONFIG_UB_UBUS is not enabled */ +static inline void ub_mem_ras_handler_register(ubmem_ras_handler handler) {} +static inline void ub_mem_ras_handler_unregister(void) {} +static inline ubmem_ras_handler ub_mem_ras_handler_get(void) { return NULL; } +static inline void ub_mem_drain_start(u32 scna) {} +static inline int ub_mem_drain_state(u32 scna) { return -EINVAL; } +static inline int ub_mem_get_numa_id(u32 scna) { return NUMA_NO_NODE; } +static inline bool ub_memory_validate_pa(u32 scna, u64 pa_start, u64 pa_end, + bool cacheable) +{ return false; } +#endif /* CONFIG_UB_UBUS */ + +#endif /* _UB_UBUS_UB_MEM_DECODER_H_ */ diff --git a/include/ub/ubus/ubus.h b/include/ub/ubus/ubus.h index 13f3b6b2ce3b..a81d652a18ff 100644 --- a/include/ub/ubus/ubus.h +++ b/include/ub/ubus/ubus.h @@ -422,6 +422,9 @@ struct ub_bus_controller { struct ub_bus_instance *bi; struct ub_bus_instance *cluster_bi; + /* ub memory decoder */ + struct ub_mem_device *mem_device; + void *data; struct dentry *debug_root; -- Gitee From 3475dc1c60ec3a2d02995bf05eb0fcb5e5c8fb85 Mon Sep 17 00:00:00 2001 From: Junlong Zheng Date: Wed, 29 Oct 2025 09:34:25 +0800 Subject: [PATCH 06/48] ub:hisi-ubus: Support for UB Memory Decoder Initialization commit d392ae30c331be4fde2c2f9aa5e649aa1071c28c openEuler Initialize the UB memory decoder, and implement the corresponding flush trigger, flush status query, and address validity verification interfaces. Signed-off-by: Yuhao Xiang Signed-off-by: Junlong Zheng Signed-off-by: yuhao_zhang --- drivers/ub/ubus/vendor/hisilicon/Makefile | 2 +- drivers/ub/ubus/vendor/hisilicon/controller.c | 2 + drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h | 2 + drivers/ub/ubus/vendor/hisilicon/memory.c | 196 ++++++++++++++++++ 4 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/ubus/vendor/hisilicon/memory.c diff --git a/drivers/ub/ubus/vendor/hisilicon/Makefile b/drivers/ub/ubus/vendor/hisilicon/Makefile index 653de75f2539..998c0e09aeef 100644 --- a/drivers/ub/ubus/vendor/hisilicon/Makefile +++ b/drivers/ub/ubus/vendor/hisilicon/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ hisi_ubus-objs := hisi-ubus.o controller.o vdm.o local-ras.o msg.o msg-core.o -hisi_ubus-objs += msg-debugfs.o eu-table.o +hisi_ubus-objs += msg-debugfs.o eu-table.o memory.o obj-$(CONFIG_UB_HISI_UBUS) += hisi_ubus.o diff --git a/drivers/ub/ubus/vendor/hisilicon/controller.c b/drivers/ub/ubus/vendor/hisilicon/controller.c index 52d4086d39b2..e5f806abc0e9 100644 --- a/drivers/ub/ubus/vendor/hisilicon/controller.c +++ b/drivers/ub/ubus/vendor/hisilicon/controller.c @@ -17,6 +17,8 @@ static struct ub_bus_controller_ops hi_ubc_ops = { .eu_table_init = hi_eu_table_init, .eu_table_uninit = hi_eu_table_uninit, .eu_cfg = hi_eu_cfg, + .mem_decoder_create = hi_mem_decoder_create, + .mem_decoder_remove = hi_mem_decoder_remove, .register_decoder_base_addr = hi_register_decoder_base_addr, .entity_enable = hi_send_entity_enable_msg, }; diff --git a/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h b/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h index ae86a0e22ff1..119efd37b407 100644 --- a/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h +++ b/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h @@ -35,6 +35,8 @@ struct hi_ubc_private_data { int hi_eu_table_init(struct ub_bus_controller *ubc); void hi_eu_table_uninit(struct ub_bus_controller *ubc); int hi_eu_cfg(struct ub_bus_controller *ubc, bool add, u32 eid, u16 upi); +int hi_mem_decoder_create(struct ub_bus_controller *ubc); +void hi_mem_decoder_remove(struct ub_bus_controller *ubc); void hi_register_decoder_base_addr(struct ub_bus_controller *ubc, u64 *cmd_queue, u64 *event_queue); int hi_send_entity_enable_msg(struct ub_entity *uent, u8 enable); diff --git a/drivers/ub/ubus/vendor/hisilicon/memory.c b/drivers/ub/ubus/vendor/hisilicon/memory.c new file mode 100644 index 000000000000..505ba7dca4fb --- /dev/null +++ b/drivers/ub/ubus/vendor/hisilicon/memory.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +#define pr_fmt(fmt) "ubus hisi memory: " fmt + +#include +#include + +#include "../../ubus.h" +#include "../../memory.h" +#include "hisi-ubus.h" + +#define DRAIN_ENABLE_REG_OFFSET 0x24 +#define DRAIN_STATE_REG_OFFSET 0x28 + +#define hpa_gen(addr_h, addr_l) (((u64)(addr_h) << 32) | (addr_l)) + +struct ub_mem_decoder { + struct device *dev; + struct ub_entity *uent; + void *base_reg; +}; + +static bool hi_mem_validate_pa(struct ub_bus_controller *ubc, + u64 pa_start, u64 pa_end, bool cacheable); + +static void hi_mem_drain_start(struct ub_mem_device *mem_device) +{ + struct ub_mem_decoder *decoder, *data = mem_device->priv_data; + + if (!data) { + dev_err(mem_device->dev, "ubc mem_decoder is null.\n"); + return; + } + + for (int i = 0; i < MEM_INFO_NUM; i++) { + decoder = &data[i]; + writel(0, decoder->base_reg + DRAIN_ENABLE_REG_OFFSET); + writel(1, decoder->base_reg + DRAIN_ENABLE_REG_OFFSET); + } +} + +static int hi_mem_drain_state(struct ub_mem_device *mem_device) +{ + struct ub_mem_decoder *decoder, *data = mem_device->priv_data; + int val = 0; + + if (!data) { + dev_err(mem_device->dev, "ubc mem_decoder is null.\n"); + return 0; + } + + for (int i = 0; i < MEM_INFO_NUM; i++) { + decoder = &data[i]; + val = readb(decoder->base_reg + DRAIN_STATE_REG_OFFSET) & 0x1; + dev_info_ratelimited(decoder->dev, "ub memory decoder[%d] drain state, val=%d\n", + i, val); + if (!val) + return val; + } + + return val; +} + +static const struct ub_mem_device_ops device_ops = { + .mem_drain_start = hi_mem_drain_start, + .mem_drain_state = hi_mem_drain_state, + .mem_validate_pa = hi_mem_validate_pa, +}; + +static int hi_mem_decoder_create_one(struct ub_bus_controller *ubc, int mar_id) +{ + struct hi_ubc_private_data *data = (struct hi_ubc_private_data *)ubc->data; + struct ub_mem_decoder *decoder, *priv_data = ubc->mem_device->priv_data; + + decoder = &priv_data[mar_id]; + decoder->dev = &ubc->dev; + decoder->uent = ubc->uent; + + decoder->base_reg = ioremap(data->mem_pa_info[mar_id].decode_addr, + SZ_64); + if (!decoder->base_reg) { + dev_err(decoder->dev, "ub mem decoder base reg ioremap failed.\n"); + return -ENOMEM; + } + + return 0; +} + +static void hi_mem_decoder_remove_one(struct ub_bus_controller *ubc, int mar_id) +{ + struct ub_mem_decoder *priv_data = ubc->mem_device->priv_data; + + iounmap(priv_data[mar_id].base_reg); +} + +int hi_mem_decoder_create(struct ub_bus_controller *ubc) +{ + struct ub_mem_device *mem_device; + void *priv_data; + int ret; + + mem_device = kzalloc(sizeof(*mem_device), GFP_KERNEL); + if (!mem_device) + return -ENOMEM; + + priv_data = kcalloc(MEM_INFO_NUM, sizeof(struct ub_mem_decoder), + GFP_KERNEL); + if (!priv_data) { + kfree(mem_device); + return -ENOMEM; + } + + mem_device->dev = &ubc->dev; + mem_device->uent = ubc->uent; + mem_device->ubmem_irq_num = -1; + mem_device->ops = &device_ops; + mem_device->priv_data = priv_data; + ubc->mem_device = mem_device; + + for (int i = 0; i < MEM_INFO_NUM; i++) { + ret = hi_mem_decoder_create_one(ubc, i); + if (ret) { + dev_err(&ubc->dev, "hi mem create decoder %d failed\n", i); + for (int j = i - 1; j >= 0; j--) + hi_mem_decoder_remove_one(ubc, j); + + kfree(mem_device->priv_data); + kfree(mem_device); + ubc->mem_device = NULL; + return ret; + } + } + + return ret; +} + +void hi_mem_decoder_remove(struct ub_bus_controller *ubc) +{ + if (!ubc->mem_device) + return; + + for (int i = 0; i < MEM_INFO_NUM; i++) + hi_mem_decoder_remove_one(ubc, i); + + kfree(ubc->mem_device->priv_data); + kfree(ubc->mem_device); + ubc->mem_device = NULL; +} + +#define MB_SIZE_OFFSET 20 + +static bool ub_hpa_valid(u64 pa_start, u64 pa_end, u32 base_addr, u32 size) +{ + if (pa_start >= ((u64)base_addr << MB_SIZE_OFFSET) && + pa_end < (((u64)base_addr + (u64)size) << MB_SIZE_OFFSET)) + return true; + + return false; +} + +static bool hi_mem_validate_pa(struct ub_bus_controller *ubc, + u64 pa_start, u64 pa_end, bool cacheable) +{ + struct hi_ubc_private_data *data; + + if (!ubc->data) { + dev_err(&ubc->dev, "Ubc data is null.\n"); + return false; + } + + if (pa_end < pa_start) { + dev_err(&ubc->dev, "pa_start is over pa_end.\n"); + return false; + } + + data = (struct hi_ubc_private_data *)ubc->data; + for (u16 i = 0; i < MEM_INFO_NUM; i++) { + if (ub_hpa_valid(pa_start, pa_end, + data->mem_pa_info[i].cc_base_addr, + data->mem_pa_info[i].cc_base_size) && + cacheable) + return true; + + if (ub_hpa_valid(pa_start, pa_end, + data->mem_pa_info[i].nc_base_addr, + data->mem_pa_info[i].nc_base_size) && + !cacheable) + return true; + } + + dev_err(&ubc->dev, "pa_start-pa_end is invalid.\n"); + return false; +} -- Gitee From e7711627821f0ec38c7d6752e645521b67b934c7 Mon Sep 17 00:00:00 2001 From: Junlong Zheng Date: Wed, 29 Oct 2025 09:46:02 +0800 Subject: [PATCH 07/48] ub:hisi-ubus: Support for ub memory decoder ras handle commit 82b774b2b67a64127583ba5aed012dc32bee0ae7 openEuler Implement RAS exception handling for the ub memory decoder, including interrupt callback handling functions, error handling interfaces, etc. Signed-off-by: Yuhao Xiang Signed-off-by: Junlong Zheng Signed-off-by: yuhao_zhang --- drivers/ub/ubus/services/gucd.c | 8 +- drivers/ub/ubus/vendor/hisilicon/controller.c | 2 + drivers/ub/ubus/vendor/hisilicon/hisi-msg.h | 1 + drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h | 2 + drivers/ub/ubus/vendor/hisilicon/memory.c | 228 ++++++++++++++++++ .../ub/ubus/vendor/hisilicon/memory_trace.h | 46 ++++ 6 files changed, 285 insertions(+), 2 deletions(-) create mode 100644 drivers/ub/ubus/vendor/hisilicon/memory_trace.h diff --git a/drivers/ub/ubus/services/gucd.c b/drivers/ub/ubus/services/gucd.c index 35a0cf35e20a..ca5f0a3578e8 100644 --- a/drivers/ub/ubus/services/gucd.c +++ b/drivers/ub/ubus/services/gucd.c @@ -8,6 +8,7 @@ #include "../ubus.h" #include "../decoder.h" #include "../ubus_driver.h" +#include "../memory.h" #include "service.h" static const struct ub_device_id component_device_ids[] = { @@ -128,15 +129,18 @@ static void ub_setup_bus_controller(struct ub_entity *uent) return; } - if ((u32)usi_count < vec_num_max) + if ((u32)usi_count < vec_num_max) { ub_err(uent, "alloc irq vectors failed, usi count=%d, vec_num_max=%u\n", usi_count, vec_num_max); - else + } else { ub_init_decoder_usi(uent); + ub_mem_init_usi(uent); + } } static void ub_unset_bus_controller(struct ub_entity *uent) { + ub_mem_uninit_usi(uent); ub_uninit_decoder_usi(uent); ub_disable_intr(uent); ub_disable_err_msq_ctrl(uent); diff --git a/drivers/ub/ubus/vendor/hisilicon/controller.c b/drivers/ub/ubus/vendor/hisilicon/controller.c index e5f806abc0e9..d7ea5c118d32 100644 --- a/drivers/ub/ubus/vendor/hisilicon/controller.c +++ b/drivers/ub/ubus/vendor/hisilicon/controller.c @@ -19,6 +19,8 @@ static struct ub_bus_controller_ops hi_ubc_ops = { .eu_cfg = hi_eu_cfg, .mem_decoder_create = hi_mem_decoder_create, .mem_decoder_remove = hi_mem_decoder_remove, + .register_ubmem_irq = hi_register_ubmem_irq, + .unregister_ubmem_irq = hi_unregister_ubmem_irq, .register_decoder_base_addr = hi_register_decoder_base_addr, .entity_enable = hi_send_entity_enable_msg, }; diff --git a/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h b/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h index 78476423bdfb..d1a68934d174 100644 --- a/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h +++ b/drivers/ub/ubus/vendor/hisilicon/hisi-msg.h @@ -59,6 +59,7 @@ enum hi_task_type { enum hi_msgq_private_opcode { EU_TABLE_CFG_CMD = 2, + GET_UBMEM_EVENT_CMD = 4 }; enum hi_msgq_user { diff --git a/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h b/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h index 119efd37b407..9aa3ba5521c1 100644 --- a/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h +++ b/drivers/ub/ubus/vendor/hisilicon/hisi-ubus.h @@ -37,6 +37,8 @@ void hi_eu_table_uninit(struct ub_bus_controller *ubc); int hi_eu_cfg(struct ub_bus_controller *ubc, bool add, u32 eid, u16 upi); int hi_mem_decoder_create(struct ub_bus_controller *ubc); void hi_mem_decoder_remove(struct ub_bus_controller *ubc); +void hi_register_ubmem_irq(struct ub_bus_controller *ubc); +void hi_unregister_ubmem_irq(struct ub_bus_controller *ubc); void hi_register_decoder_base_addr(struct ub_bus_controller *ubc, u64 *cmd_queue, u64 *event_queue); int hi_send_entity_enable_msg(struct ub_entity *uent, u8 enable); diff --git a/drivers/ub/ubus/vendor/hisilicon/memory.c b/drivers/ub/ubus/vendor/hisilicon/memory.c index 505ba7dca4fb..4d4f80f847fc 100644 --- a/drivers/ub/ubus/vendor/hisilicon/memory.c +++ b/drivers/ub/ubus/vendor/hisilicon/memory.c @@ -9,12 +9,22 @@ #include #include "../../ubus.h" +#include "../../msg.h" #include "../../memory.h" +#include "hisi-msg.h" #include "hisi-ubus.h" +#define CREATE_TRACE_POINTS +#include "memory_trace.h" #define DRAIN_ENABLE_REG_OFFSET 0x24 #define DRAIN_STATE_REG_OFFSET 0x28 +#define HI_GET_UBMEM_EVENT_REQ_SIZE 4 +#define HI_GET_UBMEM_EVENT_RSP_SIZE 772 +#define MEM_EVENT_MAX_NUM 16 +#define MAR_ERR_ADDR_COUNT 10 +#define MAR_ERR_ADDR_SIZE 2 + #define hpa_gen(addr_h, addr_l) (((u64)(addr_h) << 32) | (addr_l)) struct ub_mem_decoder { @@ -23,6 +33,28 @@ struct ub_mem_decoder { void *base_reg; }; +struct hi_ubmem_event { + u32 device_ras_status3; + u32 device_ras_status4; + u32 err_addr[MAR_ERR_ADDR_COUNT]; +}; + +struct hi_get_ubmem_event_rsp { + u32 event_num; + struct hi_ubmem_event event_info[MEM_EVENT_MAX_NUM]; +}; + +struct hi_get_ubmem_event_req { + u32 rsv0; +}; + +struct hi_get_ubmem_event_pld { + union { + struct hi_get_ubmem_event_req req; + struct hi_get_ubmem_event_rsp rsp; + }; +}; + static bool hi_mem_validate_pa(struct ub_bus_controller *ubc, u64 pa_start, u64 pa_end, bool cacheable); @@ -70,6 +102,150 @@ static const struct ub_mem_device_ops device_ops = { .mem_validate_pa = hi_mem_validate_pa, }; +static int save_ras_err_info(struct ub_mem_device *mem_device, + enum ras_err_type type, u64 hpa) +{ + struct ub_mem_ras_err_info err_info = { + .type = type, + .hpa = hpa, + }; + + if (!kfifo_put(&mem_device->ras_ctx.ras_fifo, err_info)) { + dev_err(mem_device->dev, "kfifo put failed!\n"); + return -ENOMEM; + } + + return 0; +} + +static irqreturn_t hi_mem_ras_isr(int irq, void *context) +{ + struct ub_bus_controller *ubc = (struct ub_bus_controller *)context; + struct ub_mem_ras_ctx *ras_ctx = &ubc->mem_device->ras_ctx; + struct ub_mem_ras_err_info err_info; + ubmem_ras_handler handler; + int ret; + + handler = ub_mem_ras_handler_get(); + while (kfifo_get(&ras_ctx->ras_fifo, &err_info)) { + trace_mem_ras_event(ubc->mem_device, &err_info); + pr_info("ras: type=%u\n", err_info.type); + if (handler) { + ret = handler(err_info.hpa, err_info.type); + WARN_ON(ret); + } + } + + return IRQ_HANDLED; +} + +static int err_type_bitmap[] = { + /* DEVICE_RAS_STATUS_3 */ + [UB_MEM_ATOMIC_DATA_ERR] = 31, + [UB_MEM_READ_DATA_ERR] = 28, + [UB_MEM_FLOW_POISON] = 27, + [UB_MEM_FLOW_READ_AUTH_POISON] = 23, + [UB_MEM_FLOW_READ_AUTH_RESPERR] = 22, + [UB_MEM_TIMEOUT_POISON] = 21, + [UB_MEM_TIMEOUT_RESPERR] = 20, + [UB_MEM_READ_DATA_POISON] = 19, + [UB_MEM_READ_DATA_RESPERR] = 18, + /* DEVICE_RAS_STATUS_4 */ + [MAR_NOPORT_VLD_INT_ERR] = 26, + [MAR_FLUX_INT_ERR] = 25, + [MAR_WITHOUT_CXT_ERR] = 24, + [RSP_BKPRE_OVER_TIMEOUT_ERR] = 10, + /* DEVICE_RAS_STATUS_4 need save addr */ + [MAR_NEAR_AUTH_FAIL_ERR] = 21, + [MAR_FAR_AUTH_FAIL_ERR] = 22, + [MAR_TIMEOUT_ERR] = 23, + [MAR_ILLEGAL_ACCESS_ERR] = 9, + [REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR] = 11, +}; + +static int save_ras_err_info_all(struct ub_bus_controller *ubc, struct hi_ubmem_event *info) +{ + unsigned long status3_bitmap = (unsigned long)info->device_ras_status3; + unsigned long status4_bitmap = (unsigned long)info->device_ras_status4; + u32 addr_h, addr_l; + int ret = -EINVAL; + u64 hpa = 0; + int index; + int i; + + for (i = UB_MEM_ATOMIC_DATA_ERR; i <= UB_MEM_READ_DATA_RESPERR; i++) { + if (test_bit(err_type_bitmap[i], &status3_bitmap)) { + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + if (ret) + return ret; + } + } + + for (i = MAR_FLUX_INT_ERR; i <= RSP_BKPRE_OVER_TIMEOUT_ERR; i++) { + if (test_bit(err_type_bitmap[i], &status4_bitmap)) { + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + if (ret) + return ret; + } + } + + for (i = MAR_NEAR_AUTH_FAIL_ERR; i <= REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR; i++) { + if (test_bit(err_type_bitmap[i], &status4_bitmap)) { + index = MAR_ERR_ADDR_SIZE * (i - MAR_NEAR_AUTH_FAIL_ERR); + addr_h = info->err_addr[index + 1]; + addr_l = info->err_addr[index]; + hpa = hpa_gen(addr_h, addr_l); + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + if (ret) + return ret; + } + } + + /* if no_port_vld and near_auth_fail report at the same time, ignore no_port_vld */ + if (test_bit(err_type_bitmap[MAR_NOPORT_VLD_INT_ERR], &status4_bitmap) && + !test_bit(err_type_bitmap[MAR_NEAR_AUTH_FAIL_ERR], &status4_bitmap)) { + i = MAR_NOPORT_VLD_INT_ERR; + ret = save_ras_err_info(ubc->mem_device, (enum ras_err_type)i, hpa); + } + + return ret; +} + +static irqreturn_t hi_mem_ras_irq(int irq, void *context) +{ + struct ub_bus_controller *ubc = (struct ub_bus_controller *)context; + struct hi_get_ubmem_event_pld pld = {}; + struct msg_info info = {}; + u32 event_cnt; + int ret; + + message_info_init(&info, ubc->uent, &pld, &pld, + (HI_GET_UBMEM_EVENT_REQ_SIZE << MSG_REQ_SIZE_OFFSET) | + HI_GET_UBMEM_EVENT_RSP_SIZE); + ret = hi_message_private(ubc->mdev, &info, GET_UBMEM_EVENT_CMD); + if (ret) { + dev_err(&ubc->dev, "get ubmem event failed, ret=%d\n", + ret); + return IRQ_HANDLED; + } + + event_cnt = pld.rsp.event_num; + if (event_cnt == 0 || event_cnt > MEM_EVENT_MAX_NUM) { + dev_err(&ubc->dev, "event_cnt [%u] is invalid\n", event_cnt); + return IRQ_HANDLED; + } + + for (u32 i = 0; i < event_cnt; i++) { + ret = save_ras_err_info_all(ubc, &pld.rsp.event_info[i]); + if (ret == -EINVAL) { + dev_err(&ubc->dev, "save_ras_err_info failed, ret=%d\n", ret); + return IRQ_HANDLED; + } + } + + return IRQ_WAKE_THREAD; +} + static int hi_mem_decoder_create_one(struct ub_bus_controller *ubc, int mar_id) { struct hi_ubc_private_data *data = (struct hi_ubc_private_data *)ubc->data; @@ -150,6 +326,58 @@ void hi_mem_decoder_remove(struct ub_bus_controller *ubc) ubc->mem_device = NULL; } +void hi_register_ubmem_irq(struct ub_bus_controller *ubc) +{ + struct ub_entity *uent = ubc->uent; + int irq_num, ret; + u32 usi_idx; + + if (!ubc->mem_device) { + pr_err("mem device is NULL!\n"); + return; + } + + ret = ub_cfg_read_dword(uent, UB_MEM_USI_IDX, &usi_idx); + if (ret) { + ub_err(uent, "get ubmem usi idx failed, ret=%d\n", ret); + return; + } + + irq_num = ub_irq_vector(uent, usi_idx); + if (irq_num < 0) { + ub_err(uent, "ub get irq vector failed, irq num=%d\n", irq_num); + return; + } + + INIT_KFIFO(ubc->mem_device->ras_ctx.ras_fifo); + + ret = request_threaded_irq(irq_num, hi_mem_ras_irq, + hi_mem_ras_isr, IRQF_SHARED, + "ub_mem_event", ubc); + if (ret) { + ub_err(uent, "ubmem request_irq failed, ret=%d\n", ret); + return; + } + + ubc->mem_device->ubmem_irq_num = irq_num; +} + +void hi_unregister_ubmem_irq(struct ub_bus_controller *ubc) +{ + int irq_num; + + if (!ubc->mem_device) { + dev_err(&ubc->dev, "mem device is NULL!\n"); + return; + } + + irq_num = ubc->mem_device->ubmem_irq_num; + if (irq_num < 0) + return; + + free_irq((unsigned int)irq_num, (void *)ubc); +} + #define MB_SIZE_OFFSET 20 static bool ub_hpa_valid(u64 pa_start, u64 pa_end, u32 base_addr, u32 size) diff --git a/drivers/ub/ubus/vendor/hisilicon/memory_trace.h b/drivers/ub/ubus/vendor/hisilicon/memory_trace.h new file mode 100644 index 000000000000..9204d94aa6b8 --- /dev/null +++ b/drivers/ub/ubus/vendor/hisilicon/memory_trace.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) HiSilicon Technologies Co., Ltd. 2025. All rights reserved. + */ + +/* This must be outside ifdef __HISI_MEMORY_TRACE_H__ */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ub_memory + +#if !defined(__HISI_MEMORY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __HISI_MEMORY_TRACE_H__ + +#include + +TRACE_EVENT(mem_ras_event, + TP_PROTO(struct ub_mem_device *device, struct ub_mem_ras_err_info *info), + TP_ARGS(device, info), + + TP_STRUCT__entry( + __field(u32, eid) + __field(u32, cna) + __field(u8, type) + __field(u64, hpa) + ), + + TP_fast_assign( + __entry->eid = device->uent->eid; + __entry->cna = device->uent->cna; + __entry->type = (u8)info->type; + __entry->hpa = info->hpa; + ), + + TP_printk( + "%u-%u-%u-%llu", __entry->eid, __entry->cna, + __entry->type, __entry->hpa + ) +); + +#endif /* __HISI_MEMORY_TRACE_H__ */ + +/* This must be outside ifdef __HISI_MEMORY_TRACE_H__ */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/ub/ubus/vendor/hisilicon +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE memory_trace +#include -- Gitee From 4db810eabf69955bfea4947b95fcb9d341cdd423 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:38 +0800 Subject: [PATCH 08/48] obmm: Add Ownership Based Memory Management framework commit b2b4a358c0963806835f9a6ff96c2e34d1543216 openEuler OBMM (Ownership Based Memory Management) is a core component for managing remote memory within a single machine. It enables exporting local memory and importing memory exported by other systems. After the OBMM components on both export and import sides configured, applications on the import side can access memory from the export side using standard load/store instructions just like accessing local memory. Key features of OBMM include: - Hardware Enablement: Configures hardware paths to allow load/store instructions to execute across hosts physically. - Software Enablement: Creates user-friendly software interfaces for remote memory, including integration with Linux memory management or direct mapping to user physical address space using pfnmap. - NUMA Integration: Properly integrates with the Linux NUMA subsystem to manage remote memory as part of the system's memory hierarchy. - Cross-Supernode Cache Consistency: Provides mechanisms to maintain cache consistency across different system nodes. This patch series introduces the core framework and lays the foundation for remote memory management capabilities. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- arch/arm64/configs/tencent.config | 2 ++ drivers/ub/Kconfig | 2 +- drivers/ub/Makefile | 1 + drivers/ub/obmm/Kconfig | 16 ++++++++++++++++ drivers/ub/obmm/Makefile | 6 ++++++ drivers/ub/obmm/obmm_core.c | 22 ++++++++++++++++++++++ 6 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/Kconfig create mode 100644 drivers/ub/obmm/Makefile create mode 100644 drivers/ub/obmm/obmm_core.c diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 05f77ddfcb27..32a112ea7b4e 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1844,6 +1844,8 @@ CONFIG_UB_UBMEM_UMMU=y CONFIG_UB_UMMU_BYPASSDEV=y # end of UMMU +CONFIG_OBMM=m + # URMA CONFIG_UB_URMA=m diff --git a/drivers/ub/Kconfig b/drivers/ub/Kconfig index 946067487c42..9321fefab35d 100644 --- a/drivers/ub/Kconfig +++ b/drivers/ub/Kconfig @@ -16,7 +16,7 @@ if UB source "drivers/ub/ubus/Kconfig" source "drivers/ub/ubfi/Kconfig" source "drivers/ub/ubase/Kconfig" - +source "drivers/ub/obmm/Kconfig" config UB_URMA tristate "Unified Bus (UB) urma support" default m diff --git a/drivers/ub/Makefile b/drivers/ub/Makefile index c1fd5627ca9b..a28b0c0e7ce5 100644 --- a/drivers/ub/Makefile +++ b/drivers/ub/Makefile @@ -4,3 +4,4 @@ obj-y += ubus/ obj-y += ubfi/ obj-$(CONFIG_UB_URMA) += urma/ obj-$(CONFIG_UB_UBASE) += ubase/ +obj-y += obmm/ diff --git a/drivers/ub/obmm/Kconfig b/drivers/ub/obmm/Kconfig new file mode 100644 index 000000000000..85d2eff555e1 --- /dev/null +++ b/drivers/ub/obmm/Kconfig @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0-only +config OBMM + default n + tristate "OBMM(ownership based memory management) Support" + depends on UB_UMMU_CORE && UB_UBUS && HISI_SOC_CACHE + select NUMA_REMOTE + select PFN_RANGE_ALLOC + select RECLAIM_NOTIFY + help + OBMM (Ownership Based Memory Management) provides a framework for + managing shared memory regions across multiple systems. + It supports both memory import (accessing remote memory) and export + (making local memory visible across systems) operations with proper + NUMA integration and provides capability of cross-supernode cache + consistency maintenance. + If unsure, say N. \ No newline at end of file diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile new file mode 100644 index 000000000000..78cc6fecf5f2 --- /dev/null +++ b/drivers/ub/obmm/Makefile @@ -0,0 +1,6 @@ + +# SPDX-License-Identifier: GPL-2.0+ + +obmm-y := obmm_core.o + +obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c new file mode 100644 index 000000000000..cdc7dc8ef3b9 --- /dev/null +++ b/drivers/ub/obmm/obmm_core.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +static int __init obmm_init(void) +{ + return 0; +} + +static void __exit obmm_exit(void) +{ +} + +module_init(obmm_init); +module_exit(obmm_exit); + +MODULE_DESCRIPTION("OBMM Framework's implementations."); +MODULE_AUTHOR("Huawei Tech. Co., Ltd."); +MODULE_LICENSE("GPL"); -- Gitee From d9c8f020a9e261fca6e0835364f3ac1227ae254f Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:39 +0800 Subject: [PATCH 09/48] obmm: Add region definitions and core data structures commit 051309d59dfc17ff176e3825998151af58c4c9e9 openEuler Add core data structures and definitions for OBMM memory regions. Includes: 1. Define /dev/obmm device as the main interface for OBMM driver and user-space program interaction 2. Design and implement core data structure obmm_region to manage each segment of export/import memory 3. Implement obmm_query interface (via OBMM_CMD_ADDR_QUERY ioctl) supporting: - Query by physical address - Query physical address by 4. Provide complete region lifecycle management functions: - init_obmm_region/uninit_obmm_region: initialization and cleanup - register_obmm_region/deregister_obmm_region: registration and removal - search_get_obmm_region/put_obmm_region: reference counting management 5. Implement a series of helper functions and validation interfaces: - validate_obmm_mem_id: verify memory ID validity - validate_scna: verify SCNA validity - nodes_on_same_package: check if nodes are on the same package - set_obmm_region_priv: set region private data These features lay the foundation for subsequent memory region creation, management, and sharing operations. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/obmm_core.c | 412 +++++++++++++++++++++++++++++++++++- drivers/ub/obmm/obmm_core.h | 175 +++++++++++++++ include/uapi/ub/obmm.h | 41 ++++ 3 files changed, 627 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/obmm_core.h create mode 100644 include/uapi/ub/obmm.h diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index cdc7dc8ef3b9..f6cff30b0cc3 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -4,14 +4,424 @@ * Description:OBMM Framework's implementations. */ +#include +#include +#include +#include +#include +#include +#include #include -static int __init obmm_init(void) +#include +#include +#include +#include +#include + +#include + +#include "obmm_core.h" + +size_t __obmm_memseg_size; + +/* + * OBMM centers around regions -- "struct obmm_region". Each region represents + * a chunk of memory. OBMM exposes its interface to user space through the + * device interface. Users may manipulate the memory region through ioctl to + * master device /dev/obmm, and access each memory region through standard file + * operations like open, close and mmap. + * + * To support remote memory access via UB, OBMM models two different types of + * regions, the export region and the import region. As the name suggests, the + * export region is physically located on this host (local), while the import + * region is physically attached to another host (remote). + * + * All /dev/obmm operations are essentially region creation and deletion. + * Currently, a linked list is used to keep track of all active regions. + * + * All region device (/dev/obmm_shmdev{region_id}) operations access its own + * region only. To keep our management in accordance with Linux standard device + * file, each device file's life cycle should be decided only by its reference + * counts. Therefore, the master device cannot forcefully remove a region in + * use. This complicates concurrency control and region life cycle management. + * + * concurrency control: when region is created, the only accessor to the region + * is its creator, and there is no concurrency issues to worry about. The + * concurrent access starts when we "publish" the region on the region list. + * + * All new accessors get the pointer to the region from the region list, + * directly or indirectly. Most accessors merely read some region attributes. + * Their read-only nature simplifies concurrency control, and all we need to do + * is to guarantee that the region will not be freed by others during their + * access. This is done by the "refcnt" reference counter. Using the conditional + * atomic instructions, "refcnt" is also in charge of guarding against access + * before initialization is completed, access during destruction and double-free + * problems. + */ + +static struct obmm_ctx_info g_obmm_ctx_info; +static DEFINE_IDA(g_obmm_region_ida); + +/* Return the pointer to region only if the region is active: not in initialization or + * destruction process. + */ +struct obmm_region *try_get_obmm_region(struct obmm_region *region) +{ + if (region && refcount_inc_not_zero(®ion->refcnt)) + return region; + return NULL; +} +void put_obmm_region(struct obmm_region *region) +{ + if (region) + refcount_dec(®ion->refcnt); +} +void activate_obmm_region(struct obmm_region *region) +{ + refcount_set(®ion->refcnt, 1); +} +/* Return whether the disable is success. disable succeed only when the region is active and idle */ +static inline bool disable_obmm_region_get(struct obmm_region *region) +{ + return refcount_dec_if_one(®ion->refcnt); +} + +static struct obmm_region *_search_obmm_region(int regionid) +{ + struct obmm_region *region_now; + + list_for_each_entry(region_now, &g_obmm_ctx_info.regions, node) { + if (region_now->regionid == regionid) + return region_now; + } + return NULL; +} + +struct obmm_region *search_get_obmm_region(int regionid) +{ + struct obmm_region *region; + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + spin_lock_irqsave(lock, flags); + region = _search_obmm_region(regionid); + region = try_get_obmm_region(region); + spin_unlock_irqrestore(lock, flags); + + return region; +} + +struct obmm_region *search_deactivate_obmm_region(int regionid) +{ + struct obmm_region *region; + unsigned long flags; + spinlock_t *lock; + bool success; + + lock = &g_obmm_ctx_info.lock; + spin_lock_irqsave(lock, flags); + region = _search_obmm_region(regionid); + success = region && disable_obmm_region_get(region); + spin_unlock_irqrestore(lock, flags); + + if (!region) { + pr_err("failed to deactivate: region with mem_id=%d not found.\n", regionid); + return ERR_PTR(-ENOENT); + } + + if (!success) { + pr_err("failed to deactivate: region %d is being used or in creation/destruction process.\n", + region->regionid); + return ERR_PTR(-EBUSY); + } + + return region; +} + +int obmm_query_by_offset(struct obmm_region *reg, unsigned long offset, + struct obmm_ext_addr *ext_addr) +{ + return -ENOTTY; +} + +int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr) +{ + return -ENOTTY; +} + +static int nid_to_package_id(int nid) +{ + const struct cpumask *cpumask; + int cpu; + + /* the check guard against the dynamic online / offline of local node */ + if (!is_online_local_node(nid)) + return -1; + + /* currently we cannot handle CPU-less local memory node */ + cpumask = cpumask_of_node(nid); + if (cpumask_empty(cpumask)) + return -1; + + cpu = (int)cpumask_first(cpumask); + return topology_physical_package_id(cpu); +} + +/* return -1 when any of the node is not online or is in different packages (sockets) */ +static int get_nodes_package(const nodemask_t *nodes) +{ + int nid, package_id, this_package_id; + + package_id = -1; + for_each_node_mask(nid, *nodes) { + this_package_id = nid_to_package_id(nid); + if (this_package_id == -1) + return -1; + if (package_id == -1) + package_id = this_package_id; + else if (package_id != this_package_id) + return -1; + } + return package_id; +} + +bool nodes_on_same_package(const nodemask_t *nodes) +{ + return get_nodes_package(nodes) != -1; +} + +bool validate_scna(u32 scna) +{ + int ret = ub_mem_get_numa_id(scna); + + if (ret < 0) { + pr_err("%#x is not a known scna, lookup ret=%pe", scna, ERR_PTR(ret)); + return false; + } + return true; +} + +bool validate_obmm_mem_id(__u64 mem_id) +{ + bool valid; + + valid = mem_id >= OBMM_MIN_VALID_REGIONID && mem_id <= OBMM_MAX_VALID_REGIONID; + if (!valid) + pr_err("mem_id=%llu is out of valid mem_id range.\n", mem_id); + return valid; +} + +static int insert_obmm_region(struct obmm_region *reg) +{ + struct obmm_region *region_now; + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + spin_lock_irqsave(lock, flags); + + region_now = _search_obmm_region(reg->regionid); + if (region_now != NULL) { + spin_unlock_irqrestore(lock, flags); + pr_err("obmm region already exist, mem_id = %d\n", reg->regionid); + return -EEXIST; + } + + list_add(®->node, &g_obmm_ctx_info.regions); + spin_unlock_irqrestore(lock, flags); + return 0; +} + +static void remove_obmm_region(struct obmm_region *reg) +{ + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + + spin_lock_irqsave(lock, flags); + + list_del(®->node); + + spin_unlock_irqrestore(lock, flags); +} + +void uninit_obmm_region(struct obmm_region *region) +{ + ida_free(&g_obmm_region_ida, region->regionid); +} + +int init_obmm_region(struct obmm_region *region) +{ + int retval; + + refcount_set(®ion->refcnt, 0); + INIT_LIST_HEAD(®ion->node); + + retval = ida_alloc_range(&g_obmm_region_ida, OBMM_MIN_VALID_REGIONID, + OBMM_MAX_VALID_REGIONID, GFP_KERNEL); + if (retval < 0) { + pr_err("Failed to allocate mem_id, ret=%pe\n", ERR_PTR(retval)); + return retval; + } + region->regionid = retval; + + return 0; +} + +int register_obmm_region(struct obmm_region *region) +{ + int retval; + + /* insert OBMM_region */ + retval = insert_obmm_region(region); + if (retval < 0) { + pr_err("Failed to insert obmm region %d on creation. ret=%pe\n", region->regionid, + ERR_PTR(retval)); + return retval; + } + + return 0; +} + +void deregister_obmm_region(struct obmm_region *region) +{ + remove_obmm_region(region); +} + +int set_obmm_region_priv(struct obmm_region *region, unsigned int priv_len, const void __user *priv) +{ + region->priv_len = 0; + if (priv_len > OBMM_MAX_PRIV_LEN) { + pr_err("priv_len=%u too large (limit=%u).\n", priv_len, OBMM_MAX_PRIV_LEN); + return -EINVAL; + } + + if (copy_from_user(region->priv, priv, priv_len)) { + pr_err("failed to save private data.\n"); + return -EFAULT; + } + region->priv_len = priv_len; + return 0; +} + +static int obmm_addr_query(struct obmm_cmd_addr_query *cmd_addr_query) +{ + int ret; + struct obmm_ext_addr ext_addr; + struct obmm_region *region; + + if (cmd_addr_query->key_type == OBMM_QUERY_BY_PA) { + pr_debug("obmm_query_by_pa: pa=%#llx\n", cmd_addr_query->pa); + ret = obmm_query_by_pa(cmd_addr_query->pa, &ext_addr); + if (ret == 0) { + cmd_addr_query->mem_id = ext_addr.regionid; + cmd_addr_query->offset = ext_addr.offset; + } + return ret; + } else if (cmd_addr_query->key_type == OBMM_QUERY_BY_ID_OFFSET) { + pr_debug("obmm_query_by_id_offset: mem_id=%llu offset=%#llx\n", + cmd_addr_query->mem_id, cmd_addr_query->offset); + if (!validate_obmm_mem_id(cmd_addr_query->mem_id)) + return -ENOENT; + region = search_get_obmm_region(cmd_addr_query->mem_id); + if (region == NULL) { + pr_err("region %llu not found.\n", cmd_addr_query->mem_id); + return -ENOENT; + } + ret = obmm_query_by_offset(region, cmd_addr_query->offset, &ext_addr); + if (ret == 0) + cmd_addr_query->pa = ext_addr.pa; + put_obmm_region(region); + return ret; + } + pr_err("invalid query key type: %u.\n", cmd_addr_query->key_type); + return -EINVAL; +} + +static int obmm_dev_open(struct inode *inode __always_unused, struct file *file __always_unused) { return 0; } +static int obmm_dev_flush(struct file *file __always_unused, fl_owner_t owner __always_unused) +{ + return 0; +} + +static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, unsigned long arg) +{ + int ret; + union { + struct obmm_cmd_addr_query query; + } cmd_param; + + switch (cmd) { + case OBMM_CMD_ADDR_QUERY: { + ret = (int)copy_from_user(&cmd_param.query, (void __user *)arg, + sizeof(struct obmm_cmd_addr_query)); + if (ret) { + pr_err("failed to load addr_query argument"); + return -EFAULT; + } + + ret = obmm_addr_query(&cmd_param.query); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.query, + sizeof(struct obmm_cmd_addr_query)); + if (ret) { + pr_err("failed to write obmm_query result"); + return -EFAULT; + } + } break; + default: + ret = -ENOTTY; + } + + return ret; +} + +const struct file_operations obmm_dev_fops = { .owner = THIS_MODULE, + .unlocked_ioctl = obmm_dev_ioctl, + .open = obmm_dev_open, + .flush = obmm_dev_flush }; + +static struct miscdevice obmm_dev_handle = { .minor = MISC_DYNAMIC_MINOR, + .name = OBMM_DEV_NAME, + .fops = &obmm_dev_fops }; + +static int __init obmm_init(void) +{ + int ret; + + pr_info("obmm_module: init started\n"); + + ret = misc_register(&obmm_dev_handle); + if (ret) { + pr_err("Failed to register root device. ret=%pe\n", ERR_PTR(ret)); + goto out_allocator_exit; + } + + spin_lock_init(&g_obmm_ctx_info.lock); + INIT_LIST_HEAD(&g_obmm_ctx_info.regions); + + pr_info("obmm_module: init completed\n"); + return ret; + +out_allocator_exit: + return ret; +} + static void __exit obmm_exit(void) { + pr_info("obmm_module: exit started\n"); + + misc_deregister(&obmm_dev_handle); + + pr_info("obmm_module: exit completed\n"); } module_init(obmm_init); diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h new file mode 100644 index 000000000000..edff403e3ef7 --- /dev/null +++ b/drivers/ub/obmm/obmm_core.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + * Author: + */ + +#ifndef OBMM_CORE_H +#define OBMM_CORE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OBMM_DEV_NAME "obmm" + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "OBMM: " fmt + +#define EID_FMT64 "%#llx:%#llx" +#define EID_ALIGNED_FMT64 "%#0*llx:%#0*llx" + +#define EID_ARGS64_H(eid) (*(u64 *)&(eid)[8]) +#define EID_ARGS64_L(eid) (*(u64 *)&(eid)[0]) + +extern size_t __obmm_memseg_size; +#define OBMM_MEMSEG_SIZE __obmm_memseg_size +/* + * The maximum of {OBMM_MEMSEG_SIZE, PAGE_SIZE and CACHE_MAINTAIN_GRANU}. + */ +#define OBMM_BASIC_GRANU PMD_SIZE + +#define MAX_MEMINFO_COUNT MAX_IMPORT_COUNT + +enum obmm_region_type { + OBMM_EXPORT_REGION, +}; + +#define OBMM_REGION_FLAG_NUMA_REMOTE 0x1 +#define OBMM_REGION_FLAG_ALLOW_MMAP 0x2 +#define OBMM_REGION_FLAG_MEMORY_FROM_USER 0x4 +#define OBMM_REGION_FLAG_FAST_ALLOC 0x8 + +#define OBMM_INVALID_REGIONID 0 +#define OBMM_MIN_VALID_REGIONID 1 +#define OBMM_MAX_VALID_REGIONID MINORMASK +#define OBMM_REGIONID_MAX_COUNT (OBMM_MAX_VALID_REGIONID - OBMM_MIN_VALID_REGIONID + 1) + +/* invalidate cache **on start-up** */ +/* region models a set of memory to share across hosts: a unit of sharing. */ +struct obmm_region { + /* unique within host -- can be used as an access handle */ + int regionid; + + enum obmm_region_type type; + + unsigned long flags; + + refcount_t refcnt; + + /* the total size of all memory segments included in meminfo */ + u64 mem_size; + + /* regions are chained into a list for management */ + struct list_head node; + + unsigned int priv_len; + unsigned char priv[OBMM_MAX_PRIV_LEN]; +}; + +struct obmm_ctx_info { + /* active */ + struct list_head regions; + spinlock_t lock; +}; + +void activate_obmm_region(struct obmm_region *region); +struct obmm_region *try_get_obmm_region(struct obmm_region *region); +/* Return a valid pointer or a NULL pointer. */ +struct obmm_region *search_get_obmm_region(int regionid); +/* Return a valid pointer or an error pointer, which will never be null. */ +struct obmm_region *search_deactivate_obmm_region(int regionid); +void put_obmm_region(struct obmm_region *region); + +/* Extended information of a byte address */ +struct obmm_ext_addr { + /* OBMM related */ + enum obmm_region_type region_type; + u32 regionid; + u64 offset; + + /* UB bus related */ + u32 tid; + u64 uba; + + /* host machine related */ + s32 numa_id; + u64 pa; +}; + +/* + * Get the extended OBMM information from a PA + * + * @pa: physical address to query + * @filter: the type of regions to check against + * @ext_addr: (output) the extended information related to the @pa + * + * Return 0 on success, negative value on failure (region not found). + */ +int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr); +/* + * Get the extended OBMM address information of a region by offset + * + * @region: the region to query about; refcount must held before calling this + * function + * @offset: the offset within the region (UBA offset) + * @ext_addr: (output) the extended information related to @region and @offset + * + * Return 0 on success, negative value on failure (region not found). + */ +int obmm_query_by_offset(struct obmm_region *reg, unsigned long offset, + struct obmm_ext_addr *ext_addr); + +bool nodes_on_same_package(const nodemask_t *nodes); + +/* return true if scna is a registered primary CNA of a bus controller. */ +bool validate_scna(u32 scna); +/* return true if the @mem_id is within valid range. It does not guarantee that the @mem_id is + * associated with a present region. Use search_get_obmm_region if one wants to make sure that the + * @mem_id is backed by an actual memdev. + */ +bool validate_obmm_mem_id(__u64 mem_id); + +/* internal helpers */ +static inline bool is_online_local_node(int node) +{ + return node_online(node) && !numa_is_remote_node(node); +} +static inline int __maybe_unused next_online_local_node(int node) +{ + do { + node = (int)next_node(node, node_online_map); + } while (node < MAX_NUMNODES && numa_is_remote_node(node)); + return node; +} +static inline int __maybe_unused first_online_local_node(void) +{ + int node = (int)first_node(node_online_map); + + while (node < MAX_NUMNODES && numa_is_remote_node(node)) + node = (int)next_node(node, node_online_map); + return node; +} +#define for_each_online_local_node(node) \ + for ((node) = first_online_local_node(); (node) < MAX_NUMNODES; \ + (node) = next_online_local_node(node)) + +int set_obmm_region_priv(struct obmm_region *region, unsigned int priv_len, + const void __user *priv); + +int init_obmm_region(struct obmm_region *region); +void uninit_obmm_region(struct obmm_region *region); + +int register_obmm_region(struct obmm_region *region); +void deregister_obmm_region(struct obmm_region *region); + +#endif diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h new file mode 100644 index 000000000000..98825d53390b --- /dev/null +++ b/include/uapi/ub/obmm.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef UAPI_OBMM_H +#define UAPI_OBMM_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + + +#define OBMM_MAX_LOCAL_NUMA_NODES 16 +#define MAX_NUMA_DIST 254 +#define OBMM_MAX_PRIV_LEN 512 +#define OBMM_MAX_VENDOR_LEN 128 + +enum obmm_query_key_type { + OBMM_QUERY_BY_PA, + OBMM_QUERY_BY_ID_OFFSET +}; + +struct obmm_cmd_addr_query { + /* key type decides the input and output */ + enum obmm_query_key_type key_type; + __u64 mem_id; + __u64 offset; + __u64 pa; +} __attribute__((aligned(8))); + + +#define OBMM_CMD_ADDR_QUERY _IOWR('x', 4, struct obmm_cmd_addr_query) + +#if defined(__cplusplus) +} +#endif + +#endif /* UAPI_OBMM_H */ -- Gitee From bc8c5639816d6ba1f86c25058815da3786545649 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:39 +0800 Subject: [PATCH 10/48] obmm: Add cache maintenance helpers for inter-node consistency commit 051360a4224a10ce6a7db65507e5b0d3b13deed4 openEuler Provide a series of helpers for cache flushing, TLB invalidation, page table attribute modification, and hardware register queue draining, which are used for subsequent inter-supernode consistency maintenance. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 3 +- drivers/ub/obmm/obmm_cache.c | 198 +++++++++++++++++++++++++++++++++++ drivers/ub/obmm/obmm_cache.h | 28 +++++ drivers/ub/obmm/obmm_core.c | 1 + drivers/ub/obmm/obmm_core.h | 2 + include/uapi/ub/obmm.h | 15 +++ 6 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/obmm_cache.c create mode 100644 drivers/ub/obmm/obmm_cache.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 78cc6fecf5f2..b8fb20d219fb 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ -obmm-y := obmm_core.o +obmm-y := obmm_core.o \ + obmm_cache.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/obmm_cache.c b/drivers/ub/obmm/obmm_cache.c new file mode 100644 index 000000000000..5942f2f16e2a --- /dev/null +++ b/drivers/ub/obmm/obmm_cache.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2025. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "obmm_core.h" +#include "obmm_cache.h" + +static bool skip_cache_maintain; +module_param(skip_cache_maintain, bool, 0444); +MODULE_PARM_DESC(skip_cache_maintain, + "Whether to skip cache maintain operation (to suppress errors in simulations)."); + +static bool is_valid_cache_ops(unsigned long cache_ops) +{ + return cache_ops == OBMM_SHM_CACHE_NONE || cache_ops == OBMM_SHM_CACHE_INVAL || + cache_ops == OBMM_SHM_CACHE_WB_ONLY || cache_ops == OBMM_SHM_CACHE_WB_INVAL; +} + +#define UB_MEM_DRAIN_TMOUT_MSEC 1000 + +int ub_write_queue_flush(uint32_t scna) +{ + unsigned long ub_mem_drain_timeout = jiffies + msecs_to_jiffies(UB_MEM_DRAIN_TMOUT_MSEC); + + pr_debug("call external: ub_mem_drain(scna=%#x)\n", scna); + + ub_mem_drain_start(scna); + while (!ub_mem_drain_state(scna)) { + if (time_after(jiffies, ub_mem_drain_timeout)) { + pr_err("ub_mem_drain not completed within %d msecs\n", + UB_MEM_DRAIN_TMOUT_MSEC); + return -ETIMEDOUT; + } + cpu_relax(); + } + + pr_debug("external called: ub_mem_drain\n"); + return 0; +} + +#define MAX_FLUSH_SIZE (1UL << 30) +/* the flush_cache_by_pa will yield CPU */ +#define MAX_RESCHED_ROUND 10 +#define CACHE_FLUSH_RETRY_MS 10 +int flush_cache_by_pa(phys_addr_t addr, size_t size, unsigned long cache_ops) +{ + static DEFINE_SEMAPHORE(sem, 1); + static const enum hisi_soc_cache_maint_type hisi_maint_type[] = { + /* OBMM_SHM_CACHE_NONE does not have a maintenance type */ + [OBMM_SHM_CACHE_NONE] = HISI_CACHE_MAINT_MAX, + [OBMM_SHM_CACHE_INVAL] = HISI_CACHE_MAINT_MAKEINVALID, + [OBMM_SHM_CACHE_WB_INVAL] = HISI_CACHE_MAINT_CLEANINVALID, + [OBMM_SHM_CACHE_WB_ONLY] = HISI_CACHE_MAINT_CLEANSHARED, + }; + + phys_addr_t curr_addr = addr; + size_t remain_size = size; + int ret = 0, round_to_resched = MAX_RESCHED_ROUND; + enum hisi_soc_cache_maint_type maint_type = hisi_maint_type[cache_ops]; + + if (skip_cache_maintain) { + pr_debug_ratelimited("cache maintenance request {addr=%pa, size=%#zx, cache_ops=%lu}.\n", + &addr, size, cache_ops); + return 0; + } + + if (!is_valid_cache_ops(cache_ops)) { + pr_err("invalid cache_ops %lu.\n", cache_ops); + return -EINVAL; + } + + down(&sem); + while (remain_size != 0) { + size_t flush_size; + + flush_size = remain_size <= MAX_FLUSH_SIZE ? remain_size : MAX_FLUSH_SIZE; + + /* retry if there is contention over hardware */ + while (true) { + pr_debug("call external: hisi_soc_cache_maintain(0x%llx, 0x%zx, %u)\n", + curr_addr, flush_size, maint_type); + ret = hisi_soc_cache_maintain(curr_addr, flush_size, maint_type); + pr_debug("external called: hisi_soc_cache_maintain(), ret=%pe\n", + ERR_PTR(ret)); + + if (ret != -EBUSY) + break; + pr_warn_once("Racing access of cache flushing hardware identified. The performance of UB memory may significantly degrade.\n"); + msleep(CACHE_FLUSH_RETRY_MS); + } + if (ret) + break; + + curr_addr += flush_size; + remain_size -= flush_size; + if (--round_to_resched == 0) { + cond_resched(); + round_to_resched = MAX_RESCHED_ROUND; + } + } + up(&sem); + + if (remain_size != 0) + pr_warn("%s: 0x%zx@0x%llx not flushed due to unexpected error; ret=%pe.\n", + __func__, remain_size, curr_addr, ERR_PTR(ret)); + + return ret; +} + +int obmm_region_flush_range(struct obmm_region *reg, unsigned long offset, unsigned long length, + uint8_t cache_ops) +{ + return -ENOTTY; +} + +/* flush the entire process address space */ +void obmm_flush_tlb(struct mm_struct *mm) +{ + unsigned long asid; + + dsb(ishst); + asid = __TLBI_VADDR(0, ASID(mm)); + __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); + dsb(ish); +} + +struct modify_info { + int pmd_cnt; + int pte_cnt; + int pmd_leaf_cnt; + int hugetlb_cnt; + bool cacheable; +}; + +static int modify_hugetlb_prot(pte_t *pte, unsigned long hmask __always_unused, + unsigned long addr __always_unused, + unsigned long next __always_unused, struct mm_walk *walk) +{ + struct modify_info *info = (struct modify_info *)walk->private; + bool cacheable = info->cacheable; + struct vm_area_struct *vma = walk->vma; + spinlock_t *ptl; + pgprot_t prot; + pte_t entry; + + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); + entry = ptep_get(pte); + if (unlikely(!pte_present(entry))) { + pr_warn("%s: addr = 0x%lx, pte not present\n", __func__, addr); + spin_unlock(ptl); + return 0; + } + + info->hugetlb_cnt++; + + prot = cacheable ? pgprot_tagged(pte_pgprot(entry)) : + pgprot_writecombine(pte_pgprot(entry)); + entry = pte_modify(entry, prot); + __set_pte(pte, entry); + + spin_unlock(ptl); + return 0; +} + +int modify_pgtable_prot(struct mm_struct *mm, void *va, size_t size, bool cacheable) +{ + struct modify_info info = { 0 }; + struct mm_walk_ops walk_ops = { + .hugetlb_entry = modify_hugetlb_prot, + }; + + info.cacheable = cacheable; + unsigned long start = (uintptr_t)va; + unsigned long end = start + size; + + mmap_read_lock(mm); + walk_page_range(mm, start, end, &walk_ops, &info); + mmap_read_unlock(mm); + obmm_flush_tlb(mm); + + pr_debug("scan [%p-%#lx]\n", va, (uintptr_t)va + size); + pr_debug("\tpmd: %d\n", info.pmd_cnt); + pr_debug("\tpmd leaf: %d\n", info.pmd_leaf_cnt); + pr_debug("\tpte: %d\n", info.pte_cnt); + pr_debug("\thugetlb: %d\n", info.hugetlb_cnt); + return 0; +} diff --git a/drivers/ub/obmm/obmm_cache.h b/drivers/ub/obmm/obmm_cache.h new file mode 100644 index 000000000000..28da446e0666 --- /dev/null +++ b/drivers/ub/obmm/obmm_cache.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_CACHE_H +#define OBMM_CACHE_H + +#include +#include "obmm_core.h" + +int ub_write_queue_flush(uint32_t scna); + +/* This function serializes all cache flush request issued by OBMM to avoid + * hardware resource contention + */ +int flush_cache_by_pa(phys_addr_t addr, size_t size, unsigned long cache_ops); +int obmm_region_flush_range(struct obmm_region *reg, unsigned long offset, unsigned long length, + uint8_t cache_ops); +void obmm_flush_tlb(struct mm_struct *mm); +/* Caller must guarantee that there is no concurrent modify requests made to the same va range. */ +int modify_pgtable_prot(struct mm_struct *mm, void *va, size_t size, bool cacheable); +int obmm_cache_clear(void); + +/* Defined in drivsers/soc/hisilicon, exported but not defined in their header file. */ +extern int hisi_soc_cache_maintain(phys_addr_t addr, size_t size, + enum hisi_soc_cache_maint_type maint_type); + +#endif diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index f6cff30b0cc3..3bd76749efdc 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -20,6 +20,7 @@ #include +#include "obmm_cache.h" #include "obmm_core.h" size_t __obmm_memseg_size; diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index edff403e3ef7..f01c413659fa 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -90,6 +90,8 @@ struct obmm_region *search_get_obmm_region(int regionid); struct obmm_region *search_deactivate_obmm_region(int regionid); void put_obmm_region(struct obmm_region *region); +void obmm_region_flush(struct obmm_region *reg, unsigned long cache_ops); + /* Extended information of a byte address */ struct obmm_ext_addr { /* OBMM related */ diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index 98825d53390b..65e2a35f1bb0 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -34,6 +34,21 @@ struct obmm_cmd_addr_query { #define OBMM_CMD_ADDR_QUERY _IOWR('x', 4, struct obmm_cmd_addr_query) +/* cache maintenance operations (not states) */ +/* no cache maintenance (nops) */ +#define OBMM_SHM_CACHE_NONE 0x0 +/* invalidate only (in-cache modifications may not be written back to DRAM) */ +#define OBMM_SHM_CACHE_INVAL 0x1 +/* write back and invalidate */ +#define OBMM_SHM_CACHE_WB_INVAL 0x2 +/* write back only */ +#define OBMM_SHM_CACHE_WB_ONLY 0x3 +/* Automatically choose the cache maintenance action depending on the memory + * state. The resulting choice always make sure no data would be lost, and might + * be more conservative than necessary. + */ +#define OBMM_SHM_CACHE_INFER 0x4 + #if defined(__cplusplus) } #endif -- Gitee From 56c71bdf1d35032a31aeada11c1f7cbbfe0487df Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:40 +0800 Subject: [PATCH 11/48] obmm: Add continuous memory allocator for OBMM framework commit 131c090885fb3614fabcd71a8c6216394971a720 openEuler Implement a continuous memory allocator that supports allocating large chunks of memory with fixed-size granularity. The allocator maintains an internal memory pool that continuously requests memory from the system to maintain a certain watermark level, enabling fast memory allocation for OBMM. Key features: - Fixed-size granularity allocation for large contiguous regions - Background memory pool management with expansion/contraction - Dedicated worker threads for memory clearing and pool maintenance - Support for poisoning and isolating problematic memory segments This allocator addresses the need for efficient allocation of large memory blocks that regular page allocators cannot satisfy, while providing mechanisms for proper memory hygiene and system resource management. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 3 +- drivers/ub/obmm/conti_mem_allocator.c | 579 ++++++++++++++++++++++++++ drivers/ub/obmm/conti_mem_allocator.h | 121 ++++++ 3 files changed, 702 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/conti_mem_allocator.c create mode 100644 drivers/ub/obmm/conti_mem_allocator.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index b8fb20d219fb..11e70865da1a 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0+ obmm-y := obmm_core.o \ - obmm_cache.o + obmm_cache.o \ + conti_mem_allocator.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/conti_mem_allocator.c b/drivers/ub/obmm/conti_mem_allocator.c new file mode 100644 index 000000000000..5b3cfe1d1e90 --- /dev/null +++ b/drivers/ub/obmm/conti_mem_allocator.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#define pr_fmt(fmt) "OBMM: conti_mem:" fmt +#include +#include +#include + +#include "conti_mem_allocator.h" + +static atomic_t pool_thread_should_pause = ATOMIC_INIT(0); + +static int conti_clear_memseg(struct conti_mem_allocator *a, struct memseg_node *node) +{ + if (a->ops->clear_memseg) + return a->ops->clear_memseg(a, node); + return -EOPNOTSUPP; +} + +static void conti_pool_free_memseg(struct conti_mem_allocator *a, struct memseg_node *node) +{ + if (a->ops->pool_free_memseg) { + pr_debug("free memseg: nid %d\n", a->nid); + a->ops->pool_free_memseg(a, node); + } +} + +static struct memseg_node *conti_pool_alloc_memseg(struct conti_mem_allocator *a) +{ + if (a->ops->pool_alloc_memseg) { + pr_debug("alloc memseg: nid %d\n", a->nid); + return a->ops->pool_alloc_memseg(a); + } else { + return NULL; + } +} + +static bool conti_has_poisoned_memseg(struct conti_mem_allocator *a) +{ + /* this lockless read is safe and is intended */ + return !list_empty(&a->memseg_poisoned); +} + +static bool conti_need_contract(struct conti_mem_allocator *a) +{ + if (a->ops->need_contract) + return a->ops->need_contract(a); + else + return false; +} + +static size_t conti_contract_size(struct conti_mem_allocator *a) +{ + if (a->ops->contract_size) + return a->ops->contract_size(a); + else + return 0; +} + +static bool conti_need_expand(struct conti_mem_allocator *a) +{ + if (a->ops->need_expand) + return a->ops->need_expand(a); + else + return false; +} + +static size_t conti_expand_size(struct conti_mem_allocator *a) +{ + if (a->ops->expand_size) + return a->ops->expand_size(a); + else + return 0; +} + +size_t conti_mem_allocator_expand(struct conti_mem_allocator *allocator, size_t size) +{ + unsigned long count, flags; + struct memseg_node *node; + size_t expand_size; + + if (size == 0 || size % allocator->granu) { + pr_err("size %#zx is zero or not aligned with allocator->granu.\n", size); + return 0; + } + + count = size / allocator->granu; + while (count > 0 && atomic_read(&pool_thread_should_pause) == 0) { + node = conti_pool_alloc_memseg(allocator); + if (!node) + break; + + spin_lock_irqsave(&allocator->lock, flags); + list_add_tail(&node->list, &allocator->memseg_uncleared); + spin_unlock_irqrestore(&allocator->lock, flags); + count--; + } + + if (allocator->clear_work) + wake_up_interruptible(&allocator->clear_wq); + + expand_size = size - count * allocator->granu; + atomic64_add(expand_size, &allocator->pooled_mem_size); + if (expand_size > 0) + pr_debug("%s: expand expect size %#zx, actual size %#zx\n", current->comm, size, + expand_size); + + return expand_size; +} + +size_t conti_mem_allocator_contract(struct conti_mem_allocator *allocator, size_t size) +{ + struct list_head contract_list; + struct memseg_node *node, *tmp; + unsigned long count, flags; + size_t contract_size; + + if (size == 0 || size % allocator->granu) { + pr_err_ratelimited("size %#zx is zero or not aligned with allocator->granu.\n", + size); + return 0; + } + + count = size / allocator->granu; + if (allocator->ops->pool_free_memseg == NULL) + return 0; + + INIT_LIST_HEAD(&contract_list); + spin_lock_irqsave(&allocator->lock, flags); + list_for_each_entry_safe(node, tmp, &allocator->memseg_uncleared, list) { + list_move_tail(&node->list, &contract_list); + count--; + if (count == 0) + goto done; + } + + list_for_each_entry_safe(node, tmp, &allocator->memseg_ready, list) { + list_move_tail(&node->list, &contract_list); + count--; + if (count == 0) + goto done; + } + +done: + spin_unlock_irqrestore(&allocator->lock, flags); + list_for_each_entry_safe(node, tmp, &contract_list, list) { + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + } + contract_size = size - count * allocator->granu; + atomic64_sub(contract_size, &allocator->pooled_mem_size); + if (contract_size > 0) + pr_debug("%s: nid: %d, contract expect size %#zx, actual size %#zx\n", + current->comm, allocator->nid, size, contract_size); + + return contract_size; +} + +static size_t conti_mem_allocator_free_poisoned(struct conti_mem_allocator *allocator) +{ + LIST_HEAD(free_list); + struct memseg_node *node, *tmp; + size_t free_size = 0; + unsigned long flags; + + if (allocator->ops->pool_free_memseg == NULL) { + pr_debug("%s: no means to free poisoned memseg.\n", __func__); + return 0; + } + + spin_lock_irqsave(&allocator->lock, flags); + list_splice_init(&allocator->memseg_poisoned, &free_list); + spin_unlock_irqrestore(&allocator->lock, flags); + + list_for_each_entry_safe(node, tmp, &free_list, list) { + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + free_size += allocator->granu; + } + /* The memory freed by this function has already been subtracted from pooled memory size + * when isolated. + */ + if (free_size > 0) + pr_debug("%s: nid: %d, %#zx poisoned memory freed\n", current->comm, allocator->nid, + free_size); + + return free_size; +} + +void conti_free_memory(struct conti_mem_allocator *allocator, struct list_head *head) +{ + size_t freed_size = 0; + struct memseg_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, head, list) { + freed_size += allocator->granu; + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + pr_debug("allocator: freed: %d: 0x%llx + 0x%lx\n", allocator->nid, node->addr, + node->size); + } + + atomic64_sub(freed_size, &allocator->pooled_mem_size); + atomic64_sub(freed_size, &allocator->used_mem_size); + pr_debug("%s: freed_size %#zx on node %d.\n", current->comm, freed_size, allocator->nid); +} + +static size_t conti_alloc_memory_slow(struct conti_mem_allocator *allocator, size_t size, + struct list_head *head, bool clear) +{ + struct memseg_node *node; + size_t allocated = 0; + int ret; + + while (size) { + node = conti_pool_alloc_memseg(allocator); + if (!node) + break; + + if (clear) { + ret = conti_clear_memseg(allocator, node); + if (ret < 0) { + conti_pool_free_memseg(allocator, node); + break; + } + } + allocated += allocator->granu; + list_add_tail(&node->list, head); + size -= allocator->granu; + } + + atomic64_add(allocated, &allocator->pooled_mem_size); + atomic64_add(allocated, &allocator->used_mem_size); + pr_info("%s: slow allocated %#zx from node %d\n", current->comm, allocated, allocator->nid); + return allocated; +} + +size_t conti_alloc_memory(struct conti_mem_allocator *allocator, size_t size, + struct list_head *head, bool clear, bool allow_slow) +{ + struct list_head *first, *second, *entry, temp_list; + struct memseg_node *node; + size_t allocated = 0, available; + unsigned long flags; + + atomic_inc(&pool_thread_should_pause); + INIT_LIST_HEAD(&temp_list); + if (clear) { + first = &allocator->memseg_ready; + second = &allocator->memseg_uncleared; + } else { + second = &allocator->memseg_ready; + first = &allocator->memseg_uncleared; + } + + spin_lock_irqsave(&allocator->lock, flags); + available = conti_get_avail(allocator); + if (!allow_slow && available < size) { + pr_err("%s:fast alloc failed. nid: %d, request: 0x%lx, available: 0x%lx", __func__, + allocator->nid, size, available); + spin_unlock_irqrestore(&allocator->lock, flags); + goto out_continue_pool; + } + list_for_each(entry, first) { + if (allocated >= size) + break; + allocated += allocator->granu; + pr_debug("alloc 1 node from %s list.\n", clear ? "cleared" : "uncleared"); + } + list_cut_before(head, first, entry); + + list_for_each(entry, second) { + if (allocated >= size) + break; + allocated += allocator->granu; + pr_debug("alloc 1 node from %s list.\n", !clear ? "cleared" : "uncleared"); + } + list_cut_before(&temp_list, second, entry); + spin_unlock_irqrestore(&allocator->lock, flags); + + atomic64_add(allocated, &allocator->used_mem_size); + + /* now: head collects elements from the first list, temp_list holds elements form the + * second list and clearing node. When the caller requests for cleared data, all nodes in + * temp_list should be cleared synchronously. + */ + if (clear) + list_for_each_entry(node, &temp_list, list) + conti_clear_memseg(allocator, node); + list_splice(&temp_list, head); + + if (allocated < size) + allocated += conti_alloc_memory_slow(allocator, size - allocated, head, clear); + + list_for_each_entry(node, head, list) { + pr_debug("allocator: allocated: %d: 0x%llx + 0x%lx\n", allocator->nid, node->addr, + node->size); + } + pr_info("%s: allocated %#zx from node %d\n", current->comm, allocated, allocator->nid); + +out_continue_pool: + atomic_dec(&pool_thread_should_pause); + + /* not aligned */ + WARN_ON(allocated > size); + + return allocated; +} + +bool conti_mem_allocator_isolate_memseg(struct conti_mem_allocator *a, unsigned long addr) +{ + struct memseg_node *node; + bool found = false; + unsigned long flags; + + if (!a->initialized) + return false; + addr = ALIGN_DOWN(addr, a->granu); + spin_lock_irqsave(&a->lock, flags); + list_for_each_entry(node, &a->memseg_ready, list) { + if (node->addr == addr) { + pr_debug("isolate memseg from cleared pool.\n"); + list_move(&node->list, &a->memseg_poisoned); + found = true; + goto out; + } + } + list_for_each_entry(node, &a->memseg_uncleared, list) { + if (node->addr == addr) { + pr_debug("isolate memseg from uncleared pool.\n"); + list_move(&node->list, &a->memseg_poisoned); + found = true; + goto out; + } + } + if (a->memseg_clearing && a->memseg_clearing->addr == addr) + pr_warn("memseg to isolate is being cleared; isolation failed.\n"); + else + pr_debug("memseg to isolate not found in pooled allocator of nid=%d.\n", a->nid); + +out: + spin_unlock_irqrestore(&a->lock, flags); + if (found) + atomic64_sub(a->granu, &a->pooled_mem_size); + return found; +} + +static int conti_clear_thread(void *p) +{ + struct conti_mem_allocator *allocator = p; + struct memseg_node *node; + int ret; + unsigned long flags; + + pr_debug("%s: nid=%d, start\n", __func__, allocator->nid); + allocator->memseg_clearing = NULL; + while (!kthread_should_stop()) { + wait_event_interruptible(allocator->clear_wq, + !list_empty(&allocator->memseg_uncleared) || + kthread_should_stop()); + + if (kthread_should_stop()) + break; + spin_lock_irqsave(&allocator->lock, flags); + if (list_empty(&allocator->memseg_uncleared)) { + spin_unlock_irqrestore(&allocator->lock, flags); + continue; + } + + node = list_first_entry(&allocator->memseg_uncleared, struct memseg_node, list); + list_del(&node->list); + allocator->memseg_clearing = node; + + pr_debug("clearing: %d: %pa + 0x%lx\n", allocator->nid, &node->addr, node->size); + spin_unlock_irqrestore(&allocator->lock, flags); + ret = conti_clear_memseg(allocator, node); + pr_debug("%s: nid=%d, clear done node=%p, addr=%pa\n", __func__, allocator->nid, + node, &node->addr); + + spin_lock_irqsave(&allocator->lock, flags); + allocator->memseg_clearing = NULL; + if (ret) + list_add(&node->list, &allocator->memseg_uncleared); + else + list_add(&node->list, &allocator->memseg_ready); + spin_unlock_irqrestore(&allocator->lock, flags); + } + pr_debug("%s: nid=%d, exit\n", __func__, allocator->nid); + + return 0; +} + +static int clear_thread_init(struct conti_mem_allocator *allocator) +{ + struct task_struct *work; + + work = kthread_create_on_node(conti_clear_thread, allocator, allocator->nid, + "conti_clear_%s", allocator->name); + if (IS_ERR(work)) { + pr_err("failed to init conti_clear task\n"); + return -ENODEV; + } + (void)wake_up_process(work); + + allocator->clear_work = work; + + return 0; +} + +#define POOL_THREAD_SLEEP_JIFFIES msecs_to_jiffies(5000) +static int conti_pool_thread(void *p) +{ + struct conti_mem_allocator *allocator = p; + size_t size, ret_size; + + pr_debug("%s: nid=%d, start\n", __func__, allocator->nid); + while (!kthread_should_stop()) { + wait_event_interruptible_timeout(allocator->pool_wq, + atomic_read(&pool_thread_should_pause) == 0 && + (conti_has_poisoned_memseg(allocator) || + conti_need_contract(allocator) || + conti_need_expand(allocator) || + kthread_should_stop()), + POOL_THREAD_SLEEP_JIFFIES); + + if (kthread_should_stop()) + break; + + if (conti_has_poisoned_memseg(allocator)) { + ret_size = conti_mem_allocator_free_poisoned(allocator); + pr_debug("%s: nid=%d, free poisoned done, ret=%#zx\n", __func__, + allocator->nid, ret_size); + } + + if (conti_need_contract(allocator)) { + size = conti_contract_size(allocator); + if (size > 0) { + pr_debug("%s: nid=%d, size=%#lx start contract\n", __func__, + allocator->nid, size); + ret_size = conti_mem_allocator_contract(allocator, size); + if (ret_size) + pr_debug("%s: nid=%d, contract done, ret=%#zx\n", __func__, + allocator->nid, ret_size); + } + } + + if (conti_need_expand(allocator)) { + size = conti_expand_size(allocator); + if (size > 0) { + pr_debug("%s: nid=%d, start expand\n", __func__, allocator->nid); + ret_size = conti_mem_allocator_expand(allocator, size); + if (ret_size) + pr_debug("%s: nid=%d, expand done, ret=%#zx\n", __func__, + allocator->nid, ret_size); + } + } + } + pr_debug("%s: nid=%d, exit\n", __func__, allocator->nid); + + return 0; +} + +static int pool_thread_init(struct conti_mem_allocator *allocator) +{ + struct task_struct *work; + + init_waitqueue_head(&allocator->pool_wq); + work = kthread_create_on_node(conti_pool_thread, allocator, allocator->nid, "conti_pool_%s", + allocator->name); + if (IS_ERR(work)) { + pr_err("failed to init conti_pool task\n"); + return -ENODEV; + } + (void)wake_up_process(work); + + allocator->pool_work = work; + + return 0; +} + +int conti_mem_allocator_init(struct conti_mem_allocator *allocator, int nid, size_t granu, + const struct conti_mempool_ops *ops, const char *fmt, ...) +{ + va_list ap; + int ret; + + if (!allocator || !ops) { + pr_err("%s: null pointer.", __func__); + return -EINVAL; + } + if (!ops->need_expand || !ops->expand_size) { + pr_err("expand ops is required.\n"); + return -EINVAL; + } + if (!IS_ALIGNED(granu, PAGE_SIZE) || granu == 0) { + pr_err("invalid granu size %#lx.\n", granu); + return -EINVAL; + } + + va_start(ap, fmt); + allocator->name = kvasprintf(GFP_KERNEL, fmt, ap); + va_end(ap); + if (!allocator->name) + return -ENOMEM; + + allocator->nid = nid; + allocator->granu = granu; + atomic64_set(&allocator->pooled_mem_size, 0); + atomic64_set(&allocator->used_mem_size, 0); + spin_lock_init(&allocator->lock); + INIT_LIST_HEAD(&allocator->memseg_ready); + init_waitqueue_head(&allocator->clear_wq); + INIT_LIST_HEAD(&allocator->memseg_uncleared); + allocator->memseg_clearing = NULL; + INIT_LIST_HEAD(&allocator->memseg_poisoned); + + allocator->ops = ops; + + if (ops->clear_memseg) { + ret = clear_thread_init(allocator); + if (ret) { + kfree(allocator->name); + allocator->name = NULL; + return ret; + } + } + + ret = pool_thread_init(allocator); + if (ret) { + if (allocator->clear_work) + kthread_stop(allocator->clear_work); + kfree(allocator->name); + allocator->name = NULL; + return ret; + } + + allocator->initialized = true; + + return 0; +} + +void conti_mem_allocator_deinit(struct conti_mem_allocator *allocator) +{ + struct memseg_node *node, *tmp; + struct list_head free_list; + unsigned long flags; + + INIT_LIST_HEAD(&free_list); + if (allocator->pool_work) + kthread_stop(allocator->pool_work); + + if (allocator->clear_work) + kthread_stop(allocator->clear_work); + + kfree(allocator->name); + if (!allocator->ops->pool_free_memseg) { + pr_err("pool_free_memseg is not defined.\n"); + return; + } + + /* Release all memory nodes chained in memseg_uncleared, memseg_ready + * and memseg_poisoned. + * NOTE: No memory node will be held in allocator->memseg_clearing after + * the clear worker stops working. + */ + spin_lock_irqsave(&allocator->lock, flags); + list_splice(&allocator->memseg_uncleared, &free_list); + list_splice(&allocator->memseg_ready, &free_list); + list_splice(&allocator->memseg_poisoned, &free_list); + spin_unlock_irqrestore(&allocator->lock, flags); + + list_for_each_entry_safe(node, tmp, &free_list, list) { + list_del(&node->list); + conti_pool_free_memseg(allocator, node); + } + memset(allocator, 0, sizeof(*allocator)); +} diff --git a/drivers/ub/obmm/conti_mem_allocator.h b/drivers/ub/obmm/conti_mem_allocator.h new file mode 100644 index 000000000000..812f0b07d8ab --- /dev/null +++ b/drivers/ub/obmm/conti_mem_allocator.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#ifndef CONTI_MEM_ALLOC +#define CONTI_MEM_ALLOC + +#include +#include +#include +#include + +struct memseg_node { + phys_addr_t addr; + size_t size; + struct list_head list; +}; + +struct conti_mem_allocator; + +/** + * struct conti_mempool_ops - Memory pool operation callbacks for the allocator + * + * This structure defines a set of callback functions that customize the + * behavior of the memory allocator for different memory management strategies. + * Each function pointer implements specific operations required for memory + * allocation, deallocation, and pool management. + * + * @clear_memseg: Clear the memory segment's data (e.g., zeroing or secure erase) + * @allocator: Pointer to the memory allocator instance + * @memseg: Memory segment to be cleared + * Return: 0 for success, or an error on failure + * + * @pool_free_memseg: Return a memory segment to the pool for reuse + * @allocator: Pointer to the memory allocator instance + * @memseg: Memory segment to be freed back to the pool + * + * @pool_alloc_memseg: Allocate a new memory segment from the underlying memory source + * @allocator: Pointer to the memory allocator instance + * Return: A newly allocated memory segment, or NULL on failure + * + * @need_contract: Check if the memory pool should be shrunk + * @allocator: Pointer to the memory allocator instance + * Return: true if contraction is needed, false otherwise + * + * @contract_size: Calculate the size to contract the memory pool + * @allocator: Pointer to the memory allocator instance + * Return: The size (in bytes) to reduce the pool, or 0 if no contraction + * + * @need_expand: Check if the memory pool should be expanded + * @allocator: Pointer to the memory allocator instance + * Return: true if expansion is needed, false otherwise + * + * @expand_size: Calculate the size to expand the memory pool + * @allocator: Pointer to the memory allocator instance + * Return: The size (in bytes) to increase the pool, or 0 if no expansion + */ +struct conti_mempool_ops { + int (*clear_memseg)(struct conti_mem_allocator *allocator, struct memseg_node *node); + void (*pool_free_memseg)(struct conti_mem_allocator *allocator, struct memseg_node *node); + struct memseg_node *(*pool_alloc_memseg)(struct conti_mem_allocator *allocator); + bool (*need_contract)(struct conti_mem_allocator *allocator); + size_t (*contract_size)(struct conti_mem_allocator *allocator); + bool (*need_expand)(struct conti_mem_allocator *allocator); + size_t (*expand_size)(struct conti_mem_allocator *allocator); +}; + +struct conti_mem_allocator { + bool initialized; + + int nid; + size_t granu; + + atomic64_t pooled_mem_size; + atomic64_t used_mem_size; + + spinlock_t lock; + struct list_head memseg_ready; + struct list_head memseg_uncleared; + struct memseg_node *memseg_clearing; + struct list_head memseg_poisoned; + + struct task_struct *clear_work; + struct wait_queue_head clear_wq; + + struct task_struct *pool_work; + struct wait_queue_head pool_wq; + + const struct conti_mempool_ops *ops; + const char *name; +}; + +static inline size_t conti_get_total(struct conti_mem_allocator *a) +{ + return atomic64_read(&a->pooled_mem_size); +} + +static inline size_t conti_get_avail(struct conti_mem_allocator *a) +{ + return atomic64_read(&a->pooled_mem_size) - atomic64_read(&a->used_mem_size); +} + +int conti_mem_allocator_init(struct conti_mem_allocator *allocator, int nid, size_t granu, + const struct conti_mempool_ops *ops, const char *fmt, ...) + __printf(5, 6); +void conti_mem_allocator_deinit(struct conti_mem_allocator *allocator); + +void conti_free_memory(struct conti_mem_allocator *allocator, struct list_head *head); + +size_t conti_alloc_memory(struct conti_mem_allocator *allocator, size_t size, + struct list_head *head, bool zero, bool allow_slow); + +size_t conti_mem_allocator_expand(struct conti_mem_allocator *allocator, size_t size); + +size_t conti_mem_allocator_contract(struct conti_mem_allocator *allocator, size_t size); + +bool conti_mem_allocator_isolate_memseg(struct conti_mem_allocator *allocator, unsigned long addr); + +#endif -- Gitee From a6172eaf7689bdf599edd846d7740edb235a4c68 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:40 +0800 Subject: [PATCH 12/48] obmm: Implement memory pool allocator commit 84895087fb79557b59f61f010d2ebd138ffd5ec7 openEuler Implement the memory pool allocator for OBMM framework to provide efficient memory allocation and management capabilities. Implementation includes: - ubmempool_allocator: Main memory pool implementation - Pool growth and shrink algorithms - Memory fragmentation management - Performance optimizations for high-frequency allocations The memory pool improves allocation performance and reduces memory fragmentation for OBMM operations. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 3 +- drivers/ub/obmm/conti_mem_allocator.h | 3 +- drivers/ub/obmm/obmm_core.c | 9 + drivers/ub/obmm/obmm_core.h | 4 + drivers/ub/obmm/ubmempool_allocator.c | 669 ++++++++++++++++++++++++++ drivers/ub/obmm/ubmempool_allocator.h | 21 + 6 files changed, 706 insertions(+), 3 deletions(-) create mode 100644 drivers/ub/obmm/ubmempool_allocator.c create mode 100644 drivers/ub/obmm/ubmempool_allocator.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 11e70865da1a..4d5aff8a5115 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -3,6 +3,7 @@ obmm-y := obmm_core.o \ obmm_cache.o \ - conti_mem_allocator.o + conti_mem_allocator.o \ + ubmempool_allocator.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/conti_mem_allocator.h b/drivers/ub/obmm/conti_mem_allocator.h index 812f0b07d8ab..84cf24b64b4b 100644 --- a/drivers/ub/obmm/conti_mem_allocator.h +++ b/drivers/ub/obmm/conti_mem_allocator.h @@ -103,8 +103,7 @@ static inline size_t conti_get_avail(struct conti_mem_allocator *a) } int conti_mem_allocator_init(struct conti_mem_allocator *allocator, int nid, size_t granu, - const struct conti_mempool_ops *ops, const char *fmt, ...) - __printf(5, 6); + const struct conti_mempool_ops *ops, const char *fmt, ...); void conti_mem_allocator_deinit(struct conti_mem_allocator *allocator); void conti_free_memory(struct conti_mem_allocator *allocator, struct list_head *head); diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index 3bd76749efdc..f7b30422c32a 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -21,6 +21,7 @@ #include #include "obmm_cache.h" +#include "ubmempool_allocator.h" #include "obmm_core.h" size_t __obmm_memseg_size; @@ -400,6 +401,12 @@ static int __init obmm_init(void) pr_info("obmm_module: init started\n"); + ret = ubmempool_allocator_init(); + if (ret) { + pr_err("Failed to init allocator. ret=%pe\n", ERR_PTR(ret)); + return ret; + } + ret = misc_register(&obmm_dev_handle); if (ret) { pr_err("Failed to register root device. ret=%pe\n", ERR_PTR(ret)); @@ -413,6 +420,7 @@ static int __init obmm_init(void) return ret; out_allocator_exit: + ubmempool_allocator_exit(); return ret; } @@ -421,6 +429,7 @@ static void __exit obmm_exit(void) pr_info("obmm_module: exit started\n"); misc_deregister(&obmm_dev_handle); + ubmempool_allocator_exit(); pr_info("obmm_module: exit completed\n"); } diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index f01c413659fa..8e9e128a436c 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -76,6 +76,10 @@ struct obmm_region { unsigned char priv[OBMM_MAX_PRIV_LEN]; }; +struct mem_description_pool { + struct list_head head[OBMM_MAX_LOCAL_NUMA_NODES]; +}; + struct obmm_ctx_info { /* active */ struct list_head regions; diff --git a/drivers/ub/obmm/ubmempool_allocator.c b/drivers/ub/obmm/ubmempool_allocator.c new file mode 100644 index 000000000000..a0a3e7060edd --- /dev/null +++ b/drivers/ub/obmm/ubmempool_allocator.c @@ -0,0 +1,669 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "obmm_core.h" +#include "conti_mem_allocator.h" +#include "obmm_cache.h" +#include "ubmempool_allocator.h" + +#define DEFAULT_MEMPOOL_SIZE "1G" +static char *mempool_size = DEFAULT_MEMPOOL_SIZE; +module_param(mempool_size, charp, 0440); +MODULE_PARM_DESC(mempool_size, "Max aviliable cached memory total."); + +/* contract 1T when receive lowmem notify */ +static size_t mempool_contract_size = 1ULL << 40; + +static int mempool_refill_timeout = 30000; +module_param(mempool_refill_timeout, int, 0440); +MODULE_PARM_DESC(mempool_refill_timeout, + "After detecting a memory shortage, attempt to expand the memory pool again after a period of time."); + +struct mem_allocator { + struct timer_list refill_timer; + struct conti_mem_allocator allocator; + size_t pool_size; + bool can_expand; +}; + +static inline struct mem_allocator *refill_timer_to_mem_allocator(struct timer_list *timer) +{ + return container_of(timer, struct mem_allocator, refill_timer); +} +static inline struct mem_allocator * +conti_mem_to_mem_allocator(struct conti_mem_allocator *allocator) +{ + return container_of(allocator, struct mem_allocator, allocator); +} + +static void refill_timeout(struct timer_list *timer) +{ + struct mem_allocator *m = refill_timer_to_mem_allocator(timer); + + m->can_expand = true; +} + +static struct mem_allocator mem_allocators[OBMM_MAX_LOCAL_NUMA_NODES]; + +static void pool_delay_expand(int nid) +{ + if (is_online_local_node(nid) && mem_allocators[nid].allocator.initialized) { + mem_allocators[nid].can_expand = false; + mem_allocators[nid].refill_timer.expires = + jiffies + msecs_to_jiffies(mempool_refill_timeout); + mod_timer(&mem_allocators[nid].refill_timer, + mem_allocators[nid].refill_timer.expires); + } +} + +void free_memory_contiguous(struct mem_description_pool *desc) +{ + int i; + + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) { + if (list_empty(&desc->head[i])) + continue; + + conti_free_memory(&mem_allocators[i].allocator, &desc->head[i]); + } +} + +static int clear_block(struct conti_mem_allocator *a __always_unused, struct memseg_node *p) +{ + phys_addr_t pa = p->addr; + size_t size = p->size; + void *va; + int ret; + + ret = set_linear_mapping_invalid(pa >> PAGE_SHIFT, (pa + size) >> PAGE_SHIFT, false); + if (ret < 0) { + pr_err_ratelimited("%s: error setting kernel pagetable; set_inval=false", __func__); + return ret; + } + va = ioremap_cache(pa, size); + if (!va) { + pr_err_ratelimited("%s: failed to run ioremap.\n", __func__); + goto out_recover_kernel_pagetable; + } + memset(va, 0, size); + iounmap(va); + /* flush cache after set_linear_mapping_invalid */ + ret = set_linear_mapping_invalid(pa >> PAGE_SHIFT, (pa + size) >> PAGE_SHIFT, true); + if (ret < 0) { + pr_err_ratelimited("%s: error setting kernel pagetable; set_inval=false", __func__); + return ret; + } + ret = flush_cache_by_pa(pa, size, OBMM_SHM_CACHE_WB_INVAL); + if (ret) { + pr_err_ratelimited("%s: failed to flush cache: %d; retval=%d\n", __func__, + OBMM_SHM_CACHE_INVAL, ret); + return ret; + } + return 0; +out_recover_kernel_pagetable: + WARN_ON(set_linear_mapping_invalid(pa >> PAGE_SHIFT, (pa + size) >> PAGE_SHIFT, true)); + return ret; +} + +int allocate_memory_contiguous(uint64_t size[], int length, struct mem_description_pool *desc, + bool zero, bool allow_slow) +{ + int i, ret; + struct list_head head; + size_t allocated; + + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) + INIT_LIST_HEAD(&desc->head[i]); + + for (i = 0; i < length; i++) { + INIT_LIST_HEAD(&head); + if (size[i] == 0) + continue; + if (!mem_allocators[i].allocator.initialized) { + pr_err("%s:no allocator working on node %d\n", __func__, i); + ret = -ENODEV; + goto err_free_memory; + } + if (size[i] % mem_allocators[i].allocator.granu) { + pr_err("%s: size %#llx not aligned to allocator granu %#lx on node %d\n", + __func__, size[i], mem_allocators[i].allocator.granu, i); + ret = -EINVAL; + goto err_free_memory; + } + allocated = conti_alloc_memory(&mem_allocators[i].allocator, size[i], &head, zero, + allow_slow); + list_replace(&head, &desc->head[i]); + if (allocated < size[i]) { + pr_err("%s: failed to alloc %#llx bytes for node %d\n", __func__, size[i], + i); + ret = -ENOMEM; + goto err_free_memory; + } + } + + return 0; +err_free_memory: + free_memory_contiguous(desc); + return ret; +} + +static size_t cma_contract_size(struct conti_mem_allocator *a) +{ + struct mem_allocator *m = conti_mem_to_mem_allocator(a); + ssize_t size = (ssize_t)(ALIGN(conti_get_avail(a) - m->pool_size, a->granu)); + + return size > 0 ? size : 0; +} + +static bool cma_need_contract(struct conti_mem_allocator *a) +{ + return cma_contract_size(a) > 0; +} + +static size_t cma_expand_size(struct conti_mem_allocator *a) +{ + struct mem_allocator *m = conti_mem_to_mem_allocator(a); + ssize_t size = (ssize_t)ALIGN_DOWN(m->pool_size - conti_get_avail(a), a->granu); + + return size > 0 ? size : 0; +} + +static bool cma_need_expand(struct conti_mem_allocator *a) +{ + struct mem_allocator *m = conti_mem_to_mem_allocator(a); + + if (m->can_expand) + return cma_expand_size(a) > 0; + return false; +} + +static int set_memseg_linear_mapping_invalid(struct memseg_node *node, bool set_nc) +{ + int ret; + unsigned long start_pfn, end_pfn; + + start_pfn = PHYS_PFN(node->addr); + end_pfn = PHYS_PFN(node->addr + node->size); + pr_debug("call external: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_nc=%d)\n", + start_pfn, end_pfn, set_nc); + ret = set_linear_mapping_invalid(start_pfn, end_pfn, set_nc); + if (ret) { + pr_err("failed to update kernel linear mapping cacheability for segment %#llx+%#lx, error=%pe.\n", + node->addr, node->size, ERR_PTR(ret)); + return ret; + } + pr_debug("external called: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_nc=%d, ret=%pe)\n", + start_pfn, end_pfn, set_nc, ERR_PTR(ret)); + return 0; +} + +static struct memseg_node *hugetlb_pmd_alloc_memseg(struct conti_mem_allocator *a) +{ + int ret; + struct folio *folio; + struct memseg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return NULL; + + folio = hugetlb_pool_alloc_size(a->nid, PMD_SIZE); + if (IS_ERR(folio)) { + pr_debug("hugetlb_pool_alloc(%d) returned %ld.\n", a->nid, PTR_ERR(folio)); + pool_delay_expand(a->nid); + goto out_free_node; + } + + node->size = folio_size(folio); + node->addr = folio_pfn(folio) << PAGE_SHIFT; + if (unlikely(node->size != a->granu)) { + pr_err("hugetlb_pool_alloc() returned folio smaller than expected, expect=%#zx, actual=%#zx\n", + a->granu, node->size); + goto out_free_seg; + } + + pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); + + ret = set_memseg_linear_mapping_invalid(node, true); + if (unlikely(ret)) + goto out_free_seg; + + return node; + +out_free_seg: + ret = hugetlb_pool_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free hugetlb folio on roll back, retval=%d.\n", ret); +out_free_node: + kfree(node); + return NULL; +} + +static void hugetlb_free_memseg(struct conti_mem_allocator *a __always_unused, + struct memseg_node *node) +{ + int ret; + struct folio *folio; + + if (unlikely(!node)) { + pr_err("attempted to free NULL hugetlb memseg.\n"); + return; + } + + pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); + + folio = pfn_folio(node->addr >> PAGE_SHIFT); + + set_memseg_linear_mapping_invalid(node, false); + + ret = hugetlb_pool_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free hugetlb folio, ret=%pe.\n", ERR_PTR(ret)); + + kfree(node); +} + +static struct memseg_node *hugetlb_pud_alloc_memseg(struct conti_mem_allocator *a) +{ + int ret; + struct folio *folio; + struct memseg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return NULL; + + folio = hugetlb_pool_alloc_size(a->nid, PUD_SIZE); + if (IS_ERR(folio)) { + pr_debug("hugetlb_pool_alloc(%d) returned %ld.\n", a->nid, PTR_ERR(folio)); + pool_delay_expand(a->nid); + goto out_free_node; + } + + node->size = folio_size(folio); + node->addr = folio_pfn(folio) << PAGE_SHIFT; + if (unlikely(node->size != a->granu)) { + pr_err("hugetlb_pool_alloc() returned folio smaller than expected, expect=%#zx, actual=%#zx\n", + a->granu, node->size); + goto out_free_seg; + } + + pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); + + ret = set_memseg_linear_mapping_invalid(node, true); + if (unlikely(ret)) + goto out_free_seg; + + return node; + +out_free_seg: + ret = hugetlb_pool_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free hugetlb folio on roll back, ret=%pe.\n", ERR_PTR(ret)); +out_free_node: + kfree(node); + return NULL; +} + +static void buddy_free_memseg(struct conti_mem_allocator *a __always_unused, + struct memseg_node *node) +{ + int ret; + struct folio *folio; + + if (unlikely(!node)) { + pr_err("attempted to free NULL buddy memseg.\n"); + return; + } + + pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); + + folio = pfn_folio(node->addr >> PAGE_SHIFT); + + set_memseg_linear_mapping_invalid(node, false); + + ret = pfn_range_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free buddy folio, ret=%pe.\n", ERR_PTR(ret)); + + kfree(node); +} + +static struct memseg_node *buddy_alloc_memseg(struct conti_mem_allocator *a) +{ + int ret; + struct folio *folio; + struct memseg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return NULL; + + folio = pfn_range_alloc(a->granu >> PAGE_SHIFT, a->nid); + if (IS_ERR(folio)) { + pr_debug("pfn_range_alloc(%#lx, %d) returned %ld.\n", a->granu >> PAGE_SHIFT, + a->nid, PTR_ERR(folio)); + pool_delay_expand(a->nid); + goto out_free_node; + } + + node->size = folio_size(folio); + node->addr = folio_pfn(folio) << PAGE_SHIFT; + if (unlikely(node->size != a->granu)) { + pr_err("pfn_range_alloc() returned folio smaller than expected, expect=%#zx, actual=%#zx\n", + a->granu, node->size); + goto out_free_seg; + } + + pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); + + ret = set_memseg_linear_mapping_invalid(node, true); + if (unlikely(ret)) + goto out_free_seg; + + return node; + +out_free_seg: + ret = pfn_range_free(folio); + if (unlikely(ret != 0)) + pr_err("failed to free buddy folio on roll back, ret=%pe.\n", ERR_PTR(ret)); +out_free_node: + kfree(node); + return NULL; +} + +static struct conti_mempool_ops hugetlb_pmd_ops = { + .clear_memseg = clear_block, + .pool_free_memseg = hugetlb_free_memseg, + .pool_alloc_memseg = hugetlb_pmd_alloc_memseg, + .need_contract = cma_need_contract, + .contract_size = cma_contract_size, + .need_expand = cma_need_expand, + .expand_size = cma_expand_size, +}; + +static struct conti_mempool_ops hugetlb_pud_ops = { + .clear_memseg = clear_block, + .pool_free_memseg = hugetlb_free_memseg, + .pool_alloc_memseg = hugetlb_pud_alloc_memseg, + .need_contract = cma_need_contract, + .contract_size = cma_contract_size, + .need_expand = cma_need_expand, + .expand_size = cma_expand_size, +}; + +static struct conti_mempool_ops buddy_ops = { + .clear_memseg = clear_block, + .pool_free_memseg = buddy_free_memseg, + .pool_alloc_memseg = buddy_alloc_memseg, + .need_contract = cma_need_contract, + .contract_size = cma_contract_size, + .need_expand = cma_need_expand, + .expand_size = cma_expand_size, +}; + +static void mem_allocator_uninit_one(int nid) +{ + conti_mem_allocator_deinit(&mem_allocators[nid].allocator); + timer_shutdown_sync(&mem_allocators[nid].refill_timer); +} + +static char *mempool_allocator; +module_param(mempool_allocator, charp, 0440); +MODULE_PARM_DESC(mempool_allocator, + "OBMM mempool allocator. If not set, use kernel cmdline pmd_mapping to select."); +static const char * const allocator_names[] = { + "hugetlb_pmd", + "hugetlb_pud", + "buddy_highmem", +}; +static const struct conti_mempool_ops *allocator_ops[] = { + &hugetlb_pmd_ops, + &hugetlb_pud_ops, + &buddy_ops, + NULL +}; +enum allocator_id { + ALLOCATOR_HUGETLB_PMD = 0, + ALLOCATOR_HUGETLB_PUD, + ALLOCATOR_BUDDY_HIGHMEM, + ALLOCATOR_MAX +}; + +static char *mem_allocator_granu; +module_param(mem_allocator_granu, charp, 0440); +MODULE_PARM_DESC(mem_allocator_granu, "Basic granu of obmm memory allocator."); + +static int select_mem_allocator(void) +{ + int i; + + if (!mempool_allocator) { + pr_info("no allocator specified. using buddy_highmem.\n"); + return ALLOCATOR_BUDDY_HIGHMEM; + } + + for (i = 0; i < ALLOCATOR_MAX; i++) { + if (!strcmp(allocator_names[i], mempool_allocator)) + break; + } + if (i == ALLOCATOR_MAX) { + pr_err("invalid mem allocator specified: %s\n", mempool_allocator); + return ALLOCATOR_MAX; + } + if (contig_mem_pool_percent != 100 && i == ALLOCATOR_HUGETLB_PMD) { + pr_err("using allocator %s when pmd_mapping not 100%%\n", allocator_names[i]); + i = ALLOCATOR_MAX; + } + + return i; +} + +static int init_mem_allocator_granu(enum allocator_id aid) +{ + char *p = mem_allocator_granu; + + if (!mem_allocator_granu) { + if (aid == ALLOCATOR_HUGETLB_PUD) + __obmm_memseg_size = PUD_SIZE; + else if (aid == ALLOCATOR_HUGETLB_PMD) + __obmm_memseg_size = PMD_SIZE; + return 0; + } + + __obmm_memseg_size = memparse(p, &p); + if (*p != '\0') { + pr_err("invalid mem_allocator_granu: %s\n", mem_allocator_granu); + return -EINVAL; + } + if (__obmm_memseg_size < OBMM_BASIC_GRANU || __obmm_memseg_size % OBMM_BASIC_GRANU) { + pr_err("unsupported mem_allocator_granu: %s\n", mem_allocator_granu); + return -EINVAL; + } + if (aid == ALLOCATOR_HUGETLB_PUD && __obmm_memseg_size != PUD_SIZE) { + pr_err("unsupported mem_allocator_granu for hugetlb_pud allocator: %s\n", + mem_allocator_granu); + return -EINVAL; + } + if (aid == ALLOCATOR_HUGETLB_PMD && __obmm_memseg_size != PMD_SIZE) { + pr_err("unsupported mem_allocator_granu for hugetlb_pmd allocator: %s\n", + mem_allocator_granu); + return -EINVAL; + } + + return 0; +} + +static int mem_allocator_init_one(int nid, enum allocator_id aid) +{ + struct conti_mem_allocator *allocator = &mem_allocators[nid].allocator; + int ret; + + mem_allocators[nid].can_expand = true; + timer_setup(&mem_allocators[nid].refill_timer, refill_timeout, 0); + + ret = conti_mem_allocator_init(allocator, nid, OBMM_MEMSEG_SIZE, allocator_ops[aid], + "%s/%d", allocator_names[aid], nid); + if (ret) + goto err_del_timer; + + return 0; +err_del_timer: + timer_shutdown_sync(&mem_allocators[nid].refill_timer); + return ret; +} + +size_t ubmempool_contract(int nid, bool is_hugepage) +{ + struct conti_mem_allocator *a; + bool is_hugetlb_pool; + + if (!is_online_local_node(nid)) { + pr_err_ratelimited( + "obmm tried to contract to alleviate lowmem, but nid(%d) is not valid.\n", + nid); + return 0; + } + + pr_debug_ratelimited("contract memory on nid: %d, is_hugepage: %d\n", nid, is_hugepage); + /* try to contract memory only when it is helpful */ + a = &mem_allocators[nid].allocator; + is_hugetlb_pool = a->ops == &hugetlb_pmd_ops || a->ops == &hugetlb_pud_ops; + if (is_hugepage == is_hugetlb_pool) { + pool_delay_expand(nid); + return conti_mem_allocator_contract(a, mempool_contract_size); + } + pr_debug_ratelimited("Not contracting;\n"); + + return 0; +} + +static int mempool_size_parse(char *p, int local_cnt) +{ + int nid; + size_t mempool = memparse(p, &p); + + if (local_cnt == 0) { + pr_err("There is no local numa, failed\n"); + return -EINVAL; + } + + for_each_online_local_node(nid) { + mem_allocators[nid].pool_size = div64_ul(mempool, local_cnt); + pr_info("nid=%d, pool size = %#lx bytes.\n", nid, mem_allocators[nid].pool_size); + } + return 0; +} + +/* Run in IRQ context. */ +static int handle_ghes_mem_ras(struct notifier_block *nb __always_unused, + unsigned long severity __always_unused, void *data) +{ + const struct cper_sec_mem_err *mem_err = (const struct cper_sec_mem_err *)data; + unsigned long pfn; + int nid; + + /* A defensive check for future Linux version. Currently GHES are + * handled in IRQ context. If it switches to NMI context, this handler + * no longer works. + */ + if (WARN_ON_ONCE(in_nmi())) + return NOTIFY_DONE; + + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) + return NOTIFY_DONE; + + pfn = PHYS_PFN(mem_err->physical_addr); + if (!pfn_valid(pfn)) + return NOTIFY_DONE; + + nid = page_to_nid(pfn_to_page(pfn)); + if (!is_online_local_node(nid)) + return NOTIFY_DONE; + + (void)conti_mem_allocator_isolate_memseg(&mem_allocators[nid].allocator, + mem_err->physical_addr); + return NOTIFY_OK; +} + +static struct notifier_block ghes_mem_ras_notifier_block = { + .notifier_call = handle_ghes_mem_ras, + .priority = 0, +}; + +int ubmempool_allocator_init(void) +{ + int i, j, nid, ret = 0, local_cnt = 0; + enum allocator_id aid; + + for_each_online_local_node(nid) { + if (nid >= OBMM_MAX_LOCAL_NUMA_NODES) { + /* be no mem_allocators[nid] is out of range */ + pr_err("Too many local NUMA nodes. OBMM rebuild required.\n"); + return -EOPNOTSUPP; + } + local_cnt++; + } + + memset(mem_allocators, 0, sizeof(struct mem_allocator) * OBMM_MAX_LOCAL_NUMA_NODES); + if (mempool_size) + ret = mempool_size_parse(mempool_size, local_cnt); + + if (ret) { + pr_err("memory allocator init failed.\n"); + return ret; + } + + aid = select_mem_allocator(); + if (aid == ALLOCATOR_MAX) + return -EINVAL; + + ret = init_mem_allocator_granu(aid); + if (ret) + return ret; + + for_each_online_local_node(i) { + ret = mem_allocator_init_one(i, aid); + if (ret) + goto failed; + } + + ghes_register_report_chain(&ghes_mem_ras_notifier_block); + + return 0; + +failed: + for_each_online_local_node(j) { + if (j < i) + mem_allocator_uninit_one(j); + } + + return ret; +} + +void ubmempool_allocator_exit(void) +{ + int i; + + ghes_unregister_report_chain(&ghes_mem_ras_notifier_block); + + for_each_online_local_node(i) { + if (!mem_allocators[i].allocator.initialized) + continue; + + mem_allocator_uninit_one(i); + } +} diff --git a/drivers/ub/obmm/ubmempool_allocator.h b/drivers/ub/obmm/ubmempool_allocator.h new file mode 100644 index 000000000000..7c17eb7cf85c --- /dev/null +++ b/drivers/ub/obmm/ubmempool_allocator.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#ifndef UBMEMPOOL_ALLOCATOR_H +#define UBMEMPOOL_ALLOCATOR_H + +#include "obmm_core.h" + +void free_memory_contiguous(struct mem_description_pool *desc); + +int allocate_memory_contiguous(uint64_t size[], int length, struct mem_description_pool *desc, + bool zero, bool allow_slow); + +size_t ubmempool_contract(int nid, bool is_hugepage); + +int ubmempool_allocator_init(void); +void ubmempool_allocator_exit(void); + +#endif -- Gitee From c51a536782d5709b5337246e1d3d885de1c473eb Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:40 +0800 Subject: [PATCH 13/48] obmm: Add memory region export functionality commit 64c6ae6a06e840c6c26f510acfbb8de3943a611d openEuler Implement the core export functionality for OBMM memory regions, enabling memory sharing between different processes and hosts. Key features include: 1. Added user-space interfaces (OBMM_CMD_EXPORT, OBMM_CMD_UNEXPORT) to export or reclaim memory regions. Exported memory becomes inaccessible from the local kernel space and allows access from remote servers within the same supernode. 2. Defined the core data structure 'obmm_export_region' to manage exported memory regions with associated metadata and permissions. 3. Implemented essential functions for export region operations: - Cache maintenance operations - Kernel page table attribute modifications - Memory mapping support - Address translation functions 4. Added necessary flag definitions (OBMM_EXPORT_FLAG_ALLOW_MMAP, OBMM_EXPORT_FLAG_FAST) to control export behavior and permissions. 5. Created new header file (obmm_export_region_ops.h) to declare public functions This enables the primary use case of OBMM: secure and efficient memory sharing across system boundaries. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 3 +- drivers/ub/obmm/obmm_cache.c | 31 +- drivers/ub/obmm/obmm_core.c | 70 ++++- drivers/ub/obmm/obmm_core.h | 59 +++- drivers/ub/obmm/obmm_export.c | 267 +++++++++++++++++ drivers/ub/obmm/obmm_export.h | 21 ++ drivers/ub/obmm/obmm_export_from_pool.c | 349 +++++++++++++++++++++++ drivers/ub/obmm/obmm_export_region_ops.c | 319 +++++++++++++++++++++ drivers/ub/obmm/obmm_export_region_ops.h | 23 ++ include/uapi/ub/obmm.h | 35 +++ 10 files changed, 1172 insertions(+), 5 deletions(-) create mode 100644 drivers/ub/obmm/obmm_export.c create mode 100644 drivers/ub/obmm/obmm_export.h create mode 100644 drivers/ub/obmm/obmm_export_from_pool.c create mode 100644 drivers/ub/obmm/obmm_export_region_ops.c create mode 100644 drivers/ub/obmm/obmm_export_region_ops.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 4d5aff8a5115..01689289ccec 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -4,6 +4,7 @@ obmm-y := obmm_core.o \ obmm_cache.o \ conti_mem_allocator.o \ - ubmempool_allocator.o + obmm_export.o obmm_export_from_pool.o \ + ubmempool_allocator.o obmm_export_region_ops.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/obmm_cache.c b/drivers/ub/obmm/obmm_cache.c index 5942f2f16e2a..53f0840a749b 100644 --- a/drivers/ub/obmm/obmm_cache.c +++ b/drivers/ub/obmm/obmm_cache.c @@ -13,6 +13,7 @@ #include #include "obmm_core.h" +#include "obmm_export_region_ops.h" #include "obmm_cache.h" static bool skip_cache_maintain; @@ -120,7 +121,35 @@ int flush_cache_by_pa(phys_addr_t addr, size_t size, unsigned long cache_ops) int obmm_region_flush_range(struct obmm_region *reg, unsigned long offset, unsigned long length, uint8_t cache_ops) { - return -ENOTTY; + int ret; + struct obmm_export_region *e_reg; + + /* validation */ + if (!is_valid_cache_ops(cache_ops)) { + pr_err("invalid cache operation %u\n", cache_ops); + return -EINVAL; + } + if (offset >= reg->mem_size || length > reg->mem_size - offset || + !IS_ALIGNED(offset, PAGE_SIZE) || !IS_ALIGNED(length, PAGE_SIZE)) { + pr_err("invalid flush range for region=%d: offset=0x%lx, flush_length=0x%lx, region_length=0x%llx\n", + reg->regionid, offset, length, reg->mem_size); + return -EINVAL; + } + + if (cache_ops == OBMM_SHM_CACHE_NONE) + return 0; + pr_debug("flush cache: region=%d, offset=0x%lx, length=0x%lx, cache_ops=%u\n", + reg->regionid, offset, length, cache_ops); + /* clear cache and ubus queue */ + e_reg = container_of(reg, struct obmm_export_region, region); + ret = flush_export_region(e_reg, offset, length, cache_ops); + + if (ret) + pr_err("flush failed: region=%d, offset=0x%lx, length=0x%lx, cache_ops=%u\n", + reg->regionid, offset, length, cache_ops); + else + pr_debug("cache successfully flushed.\n"); + return ret; } /* flush the entire process address space */ diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index f7b30422c32a..74fa10539a70 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -21,7 +21,9 @@ #include #include "obmm_cache.h" +#include "obmm_export_region_ops.h" #include "ubmempool_allocator.h" +#include "obmm_export.h" #include "obmm_core.h" size_t __obmm_memseg_size; @@ -144,12 +146,45 @@ struct obmm_region *search_deactivate_obmm_region(int regionid) int obmm_query_by_offset(struct obmm_region *reg, unsigned long offset, struct obmm_ext_addr *ext_addr) { - return -ENOTTY; + int ret; + struct obmm_export_region *e_reg; + + if (reg->type == OBMM_EXPORT_REGION) { + e_reg = container_of(reg, struct obmm_export_region, region); + ret = get_offset_detail_export_region(e_reg, + offset, ext_addr); + } + + return ret; } int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr) { - return -ENOTTY; + int ret = -ENOENT; + struct obmm_region *region; + unsigned long flags; + spinlock_t *lock; + + lock = &g_obmm_ctx_info.lock; + + spin_lock_irqsave(lock, flags); + list_for_each_entry(region, &g_obmm_ctx_info.regions, node) { + if (region->type == OBMM_EXPORT_REGION) { + struct obmm_export_region *e_reg; + + e_reg = container_of(region, struct obmm_export_region, + region); + ret = get_pa_detail_export_region(e_reg, pa, ext_addr); + } + + if (ret == 0) + break; + } + spin_unlock_irqrestore(lock, flags); + + if (ret) + return -ENOENT; + return 0; } static int nid_to_package_id(int nid) @@ -356,10 +391,41 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, { int ret; union { + struct obmm_cmd_export create; + struct obmm_cmd_unexport unexport; struct obmm_cmd_addr_query query; } cmd_param; switch (cmd) { + case OBMM_CMD_EXPORT: { + ret = (int)copy_from_user(&cmd_param.create, (void __user *)arg, + sizeof(struct obmm_cmd_export)); + if (ret) { + pr_err("failed to load export argument"); + return -EFAULT; + } + + ret = obmm_export_from_pool(&cmd_param.create); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.create, + sizeof(struct obmm_cmd_export)); + if (ret) { + pr_err("failed to write export result"); + return -EFAULT; + } + } break; + case OBMM_CMD_UNEXPORT: { + ret = (int)copy_from_user(&cmd_param.unexport, (void __user *)arg, + sizeof(struct obmm_cmd_unexport)); + if (ret) { + pr_err("failed to load unexport argument"); + return -EFAULT; + } + + ret = obmm_unexport(&cmd_param.unexport); + } break; case OBMM_CMD_ADDR_QUERY: { ret = (int)copy_from_user(&cmd_param.query, (void __user *)arg, sizeof(struct obmm_cmd_addr_query)); diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index 8e9e128a436c..f00797a3d99f 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -44,6 +44,12 @@ enum obmm_region_type { OBMM_EXPORT_REGION, }; +enum obmm_mmap_granu { + OBMM_MMAP_GRANU_NONE, + OBMM_MMAP_GRANU_PAGE, + OBMM_MMAP_GRANU_PMD +}; + #define OBMM_REGION_FLAG_NUMA_REMOTE 0x1 #define OBMM_REGION_FLAG_ALLOW_MMAP 0x2 #define OBMM_REGION_FLAG_MEMORY_FROM_USER 0x4 @@ -54,6 +60,9 @@ enum obmm_region_type { #define OBMM_MAX_VALID_REGIONID MINORMASK #define OBMM_REGIONID_MAX_COUNT (OBMM_MAX_VALID_REGIONID - OBMM_MIN_VALID_REGIONID + 1) +#define OBMM_MEM_ALLOW_CACHEABLE_MMAP 0x1 +#define OBMM_MEM_ALLOW_NONCACHEABLE_MMAP 0x2 + /* invalidate cache **on start-up** */ /* region models a set of memory to share across hosts: a unit of sharing. */ struct obmm_region { @@ -68,7 +77,18 @@ struct obmm_region { /* the total size of all memory segments included in meminfo */ u64 mem_size; - + /* + * the granularity of memory mapping, initially OBMM_MMAP_GRANU_NONE. + * When users call mmap, the mmap granularity is determined based on + * the mmap flags and OBMM_REGION_FLAG_ALLOW_MMAP. + */ + enum obmm_mmap_granu mmap_granu; + /* + * Determines what mode the memory can be mapped with. + * OBMM_MEM_ALLOW_CACHEABLE_MMAP: Supports cacheable mapping + * OBMM_MEM_ALLOW_NONCACHEABLE_MMAP: Supports non-cacheable mapping + */ + unsigned long mem_cap; /* regions are chained into a list for management */ struct list_head node; @@ -76,10 +96,47 @@ struct obmm_region { unsigned char priv[OBMM_MAX_PRIV_LEN]; }; +static inline bool region_allow_mmap(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_ALLOW_MMAP; +} +static inline bool region_fast_alloc(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_FAST_ALLOC; +} + struct mem_description_pool { struct list_head head[OBMM_MAX_LOCAL_NUMA_NODES]; }; +struct obmm_export_region { + struct obmm_region region; + + /* export region may use physical memory from NUMA node[0] to node[node_count-1] */ + unsigned int node_count; + uint64_t node_mem_size[OBMM_MAX_LOCAL_NUMA_NODES]; + + /* physical pages */ + union { + struct mem_description_pool mem_desc; + }; + + /* DMA mapping */ + struct sg_table sgt; + + /* UMMU device for the tokenid */ + struct device *ummu_dev; + /* UMMU RAS event notifier */ + struct ummu_event_block *ummu_event_block; + + unsigned int tokenid; + u64 uba; + unsigned int vendor_len; + void *vendor_info; + int affinity; + u8 deid[16]; +}; + struct obmm_ctx_info { /* active */ struct list_head regions; diff --git a/drivers/ub/obmm/obmm_export.c b/drivers/ub/obmm/obmm_export.c new file mode 100644 index 000000000000..009020d2e2dd --- /dev/null +++ b/drivers/ub/obmm/obmm_export.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "conti_mem_allocator.h" +#include "ubmempool_allocator.h" +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_export.h" + +int export_flags_to_region_flags(unsigned long *region_flags, unsigned long user_flags) +{ + *region_flags = 0; + + if (user_flags & (~OBMM_EXPORT_FLAG_MASK)) + return -EINVAL; + if (user_flags & OBMM_EXPORT_FLAG_ALLOW_MMAP) + *region_flags |= OBMM_REGION_FLAG_ALLOW_MMAP; + if (user_flags & OBMM_EXPORT_FLAG_FAST) + *region_flags |= OBMM_REGION_FLAG_FAST_ALLOC; + + return 0; +} +static int fill_ummu_info(struct tdev_attr *attr, struct obmm_export_region *e_reg) +{ + tdev_attr_init(attr); + attr->name = (char *)"OBMM_TDEV"; + if (e_reg->vendor_len > 0) { + attr->priv = kmemdup(e_reg->vendor_info, e_reg->vendor_len, GFP_KERNEL); + if (!attr->priv) + return -ENOMEM; + } + attr->priv_len = e_reg->vendor_len; + return 0; +} + +static void drain_ummu_info(struct tdev_attr *attr) +{ + kfree(attr->priv); +} + +static int setup_ummu(struct obmm_export_region *e_reg) +{ + struct tdev_attr attr; + uint32_t tokenid = UMMU_INVALID_TID; + int retval; + + retval = fill_ummu_info(&attr, e_reg); + if (retval) + return retval; + + /* register the memory region through UMMU */ + pr_info("call ummu_core_alloc_tdev(), priv_len=%u, tid=%u\n", attr.priv_len, tokenid); + e_reg->ummu_dev = ummu_core_alloc_tdev(&attr, &tokenid); + if (e_reg->ummu_dev == NULL) { + pr_err("Failed to create UMMU device\n"); + retval = -EPERM; + goto out_drain_info; + } + e_reg->tokenid = tokenid; + pr_debug("ummu_core_alloc_tdev() returned ummu_dev: tid=%u, name=%s\n", tokenid, + dev_name(e_reg->ummu_dev)); + + /* DMA mapping */ + pr_info("call dma_map_sgtable(..., dir=DMA_BIDIRECTIONAL, attrs=0)\n"); + retval = dma_map_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + if (retval) { + pr_err("Failed to map sgtable on UMMU. ret=%pe\n", ERR_PTR(retval)); + goto out_free_device; + } + pr_debug("dma_map_sgtable returned 0\n"); + + e_reg->uba = sg_dma_address(e_reg->sgt.sgl); + drain_ummu_info(&attr); + return 0; + +out_free_device: + if (ummu_core_free_tdev(e_reg->ummu_dev)) + pr_warn("Failed to create memory region but unable to cleanup allocated UMMU device\n"); +out_drain_info: + drain_ummu_info(&attr); + return retval; +} + +static int teardown_ummu(struct obmm_export_region *e_reg) +{ + int ret, rollback_ret; + + pr_debug("call external: dma_unmap_sgtable\n"); + dma_unmap_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + + pr_debug("call external: ummu_core_free_tdev()\n"); + ret = ummu_core_free_tdev(e_reg->ummu_dev); + if (ret) { + pr_err("Failed to free UMMU tdev, ret=%pe.\n", ERR_PTR(ret)); + goto err_free_tdev; + } + + return 0; + +err_free_tdev: + + rollback_ret = dma_map_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + if (rollback_ret) { + pr_err("Failed to map sgtable on UMMU. ret=%pe\n", ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } + if (e_reg->uba != sg_dma_address(e_reg->sgt.sgl)) { + pr_err("Tried remapping in UMMU on rollback but UBA changed.\n"); + ret = -ENOTRECOVERABLE; + pr_debug("call external: dma_unmap_sgtable\n"); + dma_unmap_sgtable(e_reg->ummu_dev, &e_reg->sgt, DMA_BIDIRECTIONAL, 0); + } + return ret; +} + +/* Make sure the memory to be exported is in properly allocated and ready to be mapped by UMMU. + * The detailed information of the memory should be put in place in e_reg->sgt + */ +static int alloc_export_memory(struct obmm_export_region *e_reg) +{ + return alloc_export_memory_pool(e_reg); +} + +static void free_export_memory_pool(struct obmm_export_region *e_reg) +{ + sg_free_table(&e_reg->sgt); + free_memory_contiguous(&e_reg->mem_desc); +} + +static void free_export_memory(struct obmm_export_region *e_reg) +{ + free_export_memory_pool(e_reg); +} + +/* Ensure all user inputs are properly converted and filled into the region. */ +int obmm_export_common(struct obmm_export_region *e_reg) +{ + int ret; + + ret = alloc_export_memory(e_reg); + if (ret) + return ret; + + ret = setup_ummu(e_reg); + if (ret) + goto free_memory; + + return 0; + +free_memory: + free_export_memory(e_reg); + + return ret; +} + +int obmm_unexport_common(struct obmm_export_region *e_reg) +{ + int ret; + + ret = teardown_ummu(e_reg); + if (ret) + return ret; + free_export_memory(e_reg); + + return 0; +} + +/* NOTE: the operation order is not precisely the reverse order of initialization for the ease of + * error rollback. Please make careful evaluation on modifications. + */ +int obmm_unexport(const struct obmm_cmd_unexport *cmd_unexport) +{ + int ret; + struct obmm_region *reg; + struct obmm_export_region *e_reg; + + pr_info("%s: mem_id=%llu, flags=%#llx.\n", __func__, cmd_unexport->mem_id, + cmd_unexport->flags); + if (!validate_obmm_mem_id(cmd_unexport->mem_id)) + return -ENOENT; + if (cmd_unexport->flags & (~OBMM_UNEXPORT_FLAG_MASK)) { + pr_err("%s: invalid flags %#llx.\n", __func__, cmd_unexport->flags); + return -EINVAL; + } + + reg = search_deactivate_obmm_region(cmd_unexport->mem_id); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + if (reg->type != OBMM_EXPORT_REGION) { + pr_err("%s: mem_id=%llu region type mismatched.\n", __func__, cmd_unexport->mem_id); + ret = -EINVAL; + goto err_unexport_common; + } + + e_reg = container_of(reg, struct obmm_export_region, region); + ret = obmm_unexport_common(e_reg); + if (ret) + goto err_unexport_common; + + deregister_obmm_region(reg); + uninit_obmm_region(reg); + free_export_region(e_reg); + + pr_info("%s: mem_id=%llu completed.\n", __func__, cmd_unexport->mem_id); + return 0; + +err_unexport_common: + activate_obmm_region(reg); + pr_err("%s: mem_id=%llu failed, %pe.\n", __func__, cmd_unexport->mem_id, ERR_PTR(ret)); + + return ret; +} + +int set_export_vendor(struct obmm_export_region *e_reg, const void __user *vendor_info, + unsigned int vendor_len) +{ + if (vendor_len == 0) { + e_reg->vendor_info = NULL; + e_reg->vendor_len = vendor_len; + return 0; + } + if (vendor_len > OBMM_MAX_VENDOR_LEN) { + pr_err("invalid vendor_len = 0x%x, should less than 0x%x", vendor_len, + OBMM_MAX_VENDOR_LEN); + return -EINVAL; + } + e_reg->vendor_info = kmalloc(vendor_len, GFP_KERNEL); + if (!e_reg->vendor_info) + return -ENOMEM; + + if (copy_from_user(e_reg->vendor_info, vendor_info, vendor_len)) { + kfree(e_reg->vendor_info); + e_reg->vendor_info = NULL; + pr_err("failed to save vendor data.\n"); + return -EFAULT; + } + e_reg->vendor_len = vendor_len; + return 0; +} + +void free_export_region(struct obmm_export_region *e_reg) +{ + if (e_reg->vendor_len) + kfree(e_reg->vendor_info); + + kfree(e_reg); +} diff --git a/drivers/ub/obmm/obmm_export.h b/drivers/ub/obmm/obmm_export.h new file mode 100644 index 000000000000..1ced6d14569a --- /dev/null +++ b/drivers/ub/obmm/obmm_export.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#ifndef OBMM_EXPORT_C_H +#define OBMM_EXPORT_C_H +int obmm_export_common(struct obmm_export_region *e_reg); + +int export_flags_to_region_flags(unsigned long *region_flags, unsigned long user_flags); + +int alloc_export_memory_pool(struct obmm_export_region *e_reg); +int obmm_unexport_common(struct obmm_export_region *e_reg); +int obmm_export_from_pool(struct obmm_cmd_export *cmd_export); +int obmm_unexport(const struct obmm_cmd_unexport *cmd_unexport); + +int set_export_vendor(struct obmm_export_region *e_reg, const void __user *vendor_info, + unsigned int vendor_len); +void free_export_region(struct obmm_export_region *e_reg); +#endif diff --git a/drivers/ub/obmm/obmm_export_from_pool.c b/drivers/ub/obmm/obmm_export_from_pool.c new file mode 100644 index 000000000000..d2b091bc0724 --- /dev/null +++ b/drivers/ub/obmm/obmm_export_from_pool.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ubmempool_allocator.h" +#include "conti_mem_allocator.h" +#include "obmm_export.h" + +/* SGL size is specified as an unsigned int. It's best to limit the size of single SGL + * no larger than (1 << MAX_CHUNK_SHIFT) + */ +#define MAX_CHUNK_SHIFT (31) +#define MAX_CHUNK_SIZE (1U << MAX_CHUNK_SHIFT) +#define MAX_CHUNK_MASK (MAX_CHUNK_SIZE - 1) + +static unsigned long size_to_chunk_count(size_t size) +{ + return (size >> MAX_CHUNK_SHIFT) + (unsigned long)((size & MAX_CHUNK_MASK) != 0); +} + +static unsigned long memseg_list_to_chunk_count(struct list_head *head) +{ + struct memseg_node *node; + phys_addr_t start = 0, end = 0; + unsigned long chunk_count = 0; + + list_for_each_entry(node, head, list) { + /* whether the new node follows previous ones */ + if (end == node->addr) { + end += OBMM_MEMSEG_SIZE; + continue; + } + chunk_count += size_to_chunk_count(end - start); + + start = node->addr; + end = node->addr + OBMM_MEMSEG_SIZE; + } + chunk_count += size_to_chunk_count(end - start); + return chunk_count; +} + +static struct scatterlist *fill_sg_chunks(struct scatterlist *s, phys_addr_t start, size_t size, + unsigned long *filled_chunks) +{ + size_t chunk_size; + unsigned long num_chunks_to_fill; + + *filled_chunks = 0; + num_chunks_to_fill = size_to_chunk_count(size); + while (num_chunks_to_fill--) { + if (s == NULL) { + /* this error is not expected to show up in release version, thus proper + * error handling is not included + */ + pr_warn_once("bug: scatterlist is not big enough.\n"); + return s; + } + chunk_size = size > MAX_CHUNK_SIZE ? MAX_CHUNK_SIZE : size; + sg_set_page(s, pfn_to_page(start >> PAGE_SHIFT), chunk_size, 0); + s = sg_next(s); + + start += chunk_size; + size -= chunk_size; + *filled_chunks += 1; + } + return s; +} +/* Return the number of chunks to fill in the scatterlist. If @sg is NULL, the + * function performs a dry run. + */ +static struct scatterlist *fill_sg_list(struct scatterlist *s, struct list_head *head, + unsigned long *filled_chunks) +{ + struct memseg_node *node; + phys_addr_t start = 0, end = 0; + unsigned long chunk_count; + + *filled_chunks = 0; + list_for_each_entry(node, head, list) { + /* whether the new node follows previous ones */ + if (end == node->addr) { + end += OBMM_MEMSEG_SIZE; + continue; + } + + if (end != 0) { + s = fill_sg_chunks(s, start, end - start, &chunk_count); + *filled_chunks += chunk_count; + } + + /* track the first piece of new chunk */ + start = node->addr; + end = node->addr + OBMM_MEMSEG_SIZE; + } + + if (end != 0) { + s = fill_sg_chunks(s, start, end - start, &chunk_count); + *filled_chunks += chunk_count; + } + + return s; +} + +static int sg_alloc_table_from_memdesc(struct sg_table *sgt, struct mem_description_pool *desc, + gfp_t gfp_mask) +{ + unsigned long chunk_count, total_chunks, filled_chunks; + struct scatterlist *s; + int ret, i; + + total_chunks = 0; + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) + total_chunks += memseg_list_to_chunk_count(&desc->head[i]); + if (total_chunks == 0) { + pr_err("%s: no memory.\n", __func__); + return -EINVAL; + } + + ret = sg_alloc_table(sgt, total_chunks, gfp_mask); + if (ret) { + pr_err("alloc sgt failed.\n"); + return ret; + } + + s = sgt->sgl; + filled_chunks = 0; + for (i = 0; i < OBMM_MAX_LOCAL_NUMA_NODES; i++) { + s = fill_sg_list(s, &desc->head[i], &chunk_count); + filled_chunks += chunk_count; + } + + if (filled_chunks != total_chunks || s != NULL) { + pr_err("%s: internal error.\n", __func__); + ret = -ENOTRECOVERABLE; + goto sg_err; + } + return 0; + +sg_err: + sg_free_table(sgt); + + return ret; +} + +int alloc_export_memory_pool(struct obmm_export_region *e_reg) +{ + int ret; + unsigned int i; + struct mem_description_pool *desc; + bool allow_slow = !region_fast_alloc(&e_reg->region); + + for (i = 0; i < e_reg->node_count; i++) { + if (e_reg->node_mem_size[i] == 0) + continue; + if (e_reg->node_mem_size[i] % OBMM_MEMSEG_SIZE) { + pr_err("invalid size 0x%llx on node %d: not aligned to mempool granu %#lx\n", + e_reg->node_mem_size[i], i, OBMM_MEMSEG_SIZE); + return -EINVAL; + } + } + + pr_debug("export_from_pool: allocation started.\n"); + desc = &e_reg->mem_desc; + ret = allocate_memory_contiguous(e_reg->node_mem_size, e_reg->node_count, desc, true, + allow_slow); + if (ret) + return ret; + pr_debug("export_from_pool: allocation completed. sgtable preparation started.\n"); + + ret = sg_alloc_table_from_memdesc(&e_reg->sgt, desc, GFP_KERNEL); + if (ret) { + free_memory_contiguous(desc); + return ret; + } + pr_debug("export_from_pool: sgtable preparation completed.\n"); + + return 0; +} + +static int calculate_export_region_size(unsigned long *total_size, + struct obmm_cmd_export *cmd_export) +{ + uint64_t i; + nodemask_t nodes = NODE_MASK_NONE; + + if (cmd_export->length > OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Size list is too long: max=%d, actual_length=%lld\n", + OBMM_MAX_LOCAL_NUMA_NODES, cmd_export->length); + return -E2BIG; + } + if (cmd_export->pxm_numa > OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Invalid pxm_numa %d\n", cmd_export->pxm_numa); + return -EINVAL; + } + + *total_size = 0; + for (i = 0; i < cmd_export->length; i++) { + if (!IS_ALIGNED(cmd_export->size[i], OBMM_MEMSEG_SIZE)) { + pr_err("The size of new OBMM region 0x%llx on node %d is not aligned to OBMM memseg size %#lx.\n", + cmd_export->size[i], (int)i, OBMM_MEMSEG_SIZE); + return -EINVAL; + } + if (cmd_export->size[i] != 0 && !is_online_local_node(i)) { + pr_err("Cannot export memory from offlined or remote numa node %d\n", + (int)i); + return -ENODEV; + } + if (cmd_export->size[i] != 0) { + if (*total_size > *total_size + cmd_export->size[i]) { + pr_err("Memory size overflowed!\n"); + return -EOVERFLOW; + } + *total_size += cmd_export->size[i]; + node_set(i, nodes); + } + } + if (*total_size == 0) { + pr_err("The size of new OBMM region is 0. Non-zero value expected\n"); + return -EINVAL; + } + node_set(cmd_export->pxm_numa, nodes); + if (!nodes_on_same_package(&nodes)) { + pr_err("Cannot use memory from multiple sockets.\n"); + return -EINVAL; + } + + return 0; +} + +static struct obmm_export_region *alloc_region_from_cmd(struct obmm_cmd_export *cmd_export) +{ + struct obmm_export_region *e_reg; + unsigned long total_size; + int ret; + + ret = calculate_export_region_size(&total_size, cmd_export); + if (ret) + return ERR_PTR(ret); + + e_reg = kzalloc(sizeof(struct obmm_export_region), GFP_KERNEL); + if (e_reg == NULL) + return ERR_PTR(-ENOMEM); + + e_reg->region.type = OBMM_EXPORT_REGION; + e_reg->region.mem_size = total_size; + e_reg->region.mem_cap = OBMM_MEM_ALLOW_CACHEABLE_MMAP | OBMM_MEM_ALLOW_NONCACHEABLE_MMAP; + e_reg->affinity = cmd_export->pxm_numa; + memcpy(e_reg->deid, cmd_export->deid, sizeof(e_reg->deid)); + ret = export_flags_to_region_flags(&e_reg->region.flags, cmd_export->flags); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + e_reg->node_count = cmd_export->length; + memcpy(e_reg->node_mem_size, cmd_export->size, sizeof(uint64_t) * e_reg->node_count); + /* compaction */ + while (e_reg->node_count - 1 > 0 && e_reg->node_mem_size[e_reg->node_count - 1] == 0) + e_reg->node_count--; + ret = set_obmm_region_priv(&e_reg->region, cmd_export->priv_len, cmd_export->priv); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + ret = set_export_vendor(e_reg, cmd_export->vendor_info, cmd_export->vendor_len); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + return e_reg; +} + +static void print_export_param(const struct obmm_cmd_export *cmd_export) +{ + unsigned int i; + + pr_info("obmm_export: len(sizes)=%#llx sizes={", cmd_export->length); + for (i = 0; i < cmd_export->length && i < OBMM_MAX_LOCAL_NUMA_NODES; i++) + if (cmd_export->size[i]) + pr_cont(" [%u]:%#llx", i, cmd_export->size[i]); + if (i < cmd_export->length) + pr_cont(" ..."); + + pr_cont(" } flags=%#llx deid=" EID_FMT64 " priv_len=%u\n", cmd_export->flags, + EID_ARGS64_H(cmd_export->deid), EID_ARGS64_L(cmd_export->deid), + cmd_export->priv_len); +} + +/* obmm_export_from_pool: create an OBMM-exported memory region. The region is + * physically located on this host and can be accessed from remote host. + * In OBMM's terminology, it is an export region. + */ +int obmm_export_from_pool(struct obmm_cmd_export *cmd_export) +{ + struct obmm_export_region *e_reg; + uint64_t uba, mem_id; + uint32_t token_id; + int ret; + + print_export_param(cmd_export); + e_reg = alloc_region_from_cmd(cmd_export); + if (IS_ERR(e_reg)) + return PTR_ERR(e_reg); + + ret = init_obmm_region(&e_reg->region); + if (ret) + goto out_free_reg; + + ret = obmm_export_common(e_reg); + if (ret) + goto out_unit_reg; + + token_id = e_reg->tokenid; + uba = e_reg->uba; + mem_id = (uint64_t)e_reg->region.regionid; + + ret = register_obmm_region(&e_reg->region); + if (ret) + goto out_unexport; + activate_obmm_region(&e_reg->region); + + cmd_export->tokenid = token_id; + cmd_export->uba = uba; + cmd_export->mem_id = mem_id; + + pr_info("obmm_export: mem_id=%llu online.\n", mem_id); + return 0; + +out_unexport: + obmm_unexport_common(e_reg); +out_unit_reg: + uninit_obmm_region(&e_reg->region); +out_free_reg: + free_export_region(e_reg); + return ret; +} diff --git a/drivers/ub/obmm/obmm_export_region_ops.c b/drivers/ub/obmm/obmm_export_region_ops.c new file mode 100644 index 000000000000..3b9cdd6b7dac --- /dev/null +++ b/drivers/ub/obmm/obmm_export_region_ops.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include "conti_mem_allocator.h" +#include "ubmempool_allocator.h" +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_export_region_ops.h" + +/** + * walk_fn for one obmm_export_region. + * @start: start physical address. + * @end: end physical address. + * @offset: offset of `start` address in this obmm_export_region. + * @arg: private argument. + */ +typedef int (*walk_fn)(phys_addr_t start, phys_addr_t end, unsigned long offset, void *arg); + +/* + * this function stops if walk_fn returns an error. + */ +static void walk_export_memory(const struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, walk_fn fn, void *arg) +{ + struct scatterlist *sg; + /* the offset of current sg from the region's beginning */ + unsigned long offset_from_head = 0; + unsigned int i; + int ret = 0; + + if (length == 0) + return; + + for_each_sgtable_sg(&e_reg->sgt, sg, i) { + phys_addr_t start, size; + + if (offset >= sg->length) { + offset -= sg->length; + offset_from_head += sg->length; + continue; + } + + /* now the offset is an intra-sg offset */ + start = page_to_phys(sg_page(sg)) + offset; + size = min(sg->length, offset + length) - offset; + + ret = fn(start, start + size - 1, offset_from_head + offset, arg); + if (ret) + return; + + offset = 0; + length -= size; + if (length == 0) + break; + + offset_from_head += sg->length; + } + + if (ret == 0 && length != 0) + pr_warn("%s: excessive length: %#lx bytes not walked.\n", __func__, length); +} + +struct flush_cache_info { + int ret; + int succ_flush_count; + int total_flush_count; + unsigned long cache_ops; +}; + +static int flush_cache_call(phys_addr_t start, phys_addr_t end, + unsigned long offset __always_unused, void *arg) +{ + struct flush_cache_info *info = (struct flush_cache_info *)arg; + + int ret = flush_cache_by_pa(start, end - start + 1, info->cache_ops); + + if (ret == 0) + info->succ_flush_count++; + else if (info->ret == 0) + info->ret = ret; + else if (info->ret != ret) + pr_warn("multiple flush error types detected: new flush_ret=%pe.\n", ERR_PTR(ret)); + info->total_flush_count++; + + return 0; +} + +int flush_export_region(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops) +{ + struct flush_cache_info info = { 0 }; + + info.cache_ops = cache_ops; + walk_export_memory(e_reg, offset, length, flush_cache_call, &info); + + pr_debug("export region flushed: flush_offset:0x%lx, flush_len:0x%lx, cache_ops:%lu, flush_round:%d, flush_succ_round:%d, flush_retval:%pe.\n", + offset, length, cache_ops, info.total_flush_count, info.succ_flush_count, + ERR_PTR(info.ret)); + return info.ret; +} + +struct kernel_pgtable_info { + int ret; + bool set_inval; +}; + +static int kernel_pgtable_invalid_call(phys_addr_t start, phys_addr_t end, + unsigned long offset __always_unused, void *arg) +{ + struct kernel_pgtable_info *info = (struct kernel_pgtable_info *)arg; + int ret; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = (end + 1) >> PAGE_SHIFT; + + pr_debug("call external: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_inval=%d)\n", + start_pfn, end_pfn, info->set_inval); + ret = set_linear_mapping_invalid(start_pfn, end_pfn, info->set_inval); + if (ret < 0) { + pr_err("error calling set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_inval=%d): ret=%pe\n", + start_pfn, end_pfn, info->set_inval, ERR_PTR(ret)); + } else { + pr_debug("external called: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_inval=%d, ret=%pe)\n", + start_pfn, end_pfn, info->set_inval, ERR_PTR(ret)); + } + + info->ret = ret; + return 0; +} + +int kernel_pgtable_set_export_invalid(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, bool set_inval) +{ + struct kernel_pgtable_info info = { 0 }; + + if (offset % OBMM_MEMSEG_SIZE != 0 || length % OBMM_MEMSEG_SIZE != 0) { + pr_err("%s: invalid param: offset=%#lx length=%#lx", __func__, offset, length); + return -EINVAL; + } + + info.set_inval = set_inval; + walk_export_memory(e_reg, offset, length, kernel_pgtable_invalid_call, &info); + pr_debug("%s: [flush_offset=0x%lx, flush_len=0x%lx, set_inval=%d, ret=%pe]\n", + __func__, offset, length, set_inval, ERR_PTR(info.ret)); + return info.ret; +} + +struct map_range_info { + struct obmm_export_region *e_reg; + struct vm_area_struct *vma; + unsigned long orig_pgoff; + enum obmm_mmap_granu mmap_granu; + int ret; +}; + +static int map_range_call(phys_addr_t start, phys_addr_t end, unsigned long offset, void *arg) +{ + struct map_range_info *info = (struct map_range_info *)arg; + unsigned long length = end - start + 1; + unsigned long vm_offset; + + /* + * The offset is offset_from_head. + * + * In the case of private mapping, after remap_pfn_range is called, + * vma->vm_pgoff will be set to pfn, + * but we still need the original offset relative to the start of the region. + */ + vm_offset = offset - (info->orig_pgoff << PAGE_SHIFT); + if (info->mmap_granu == OBMM_MMAP_GRANU_PAGE) { + info->ret = remap_pfn_range(info->vma, info->vma->vm_start + vm_offset, + start >> PAGE_SHIFT, length, info->vma->vm_page_prot); + } else if (info->mmap_granu == OBMM_MMAP_GRANU_PMD) { + info->ret = remap_pfn_range_try_pmd(info->vma, info->vma->vm_start + vm_offset, + start >> PAGE_SHIFT, length, + info->vma->vm_page_prot); + } else { + pr_err("invalid mmap granu: %d\n", info->mmap_granu); + info->ret = -EINVAL; + } + if (info->ret) { + pr_err("map_export_region: failed to call remap_pfn_range on region %d: offset=%#lx, length=%#lx, ret=%pe]\n", + info->e_reg->region.regionid, offset, length, ERR_PTR(info->ret)); + return -1; + } + + return 0; +} + +int map_export_region(struct vm_area_struct *vma, struct obmm_export_region *e_reg, + enum obmm_mmap_granu mmap_granu) +{ + struct map_range_info info; + unsigned long size, offset; + + info.e_reg = e_reg; + info.vma = vma; + info.ret = 0; + info.orig_pgoff = vma->vm_pgoff; + info.mmap_granu = mmap_granu; + + size = vma->vm_end - vma->vm_start; + offset = vma->vm_pgoff << PAGE_SHIFT; + + walk_export_memory(e_reg, offset, size, map_range_call, (void *)(&info)); + + return info.ret; +} + +struct pa_info { + unsigned long pa; + unsigned long offset; + bool found; +}; + +static int search_offset_from_pa(phys_addr_t start, phys_addr_t end, unsigned long offset, + void *arg) +{ + struct pa_info *info = (struct pa_info *)arg; + + if (info->pa >= start && info->pa <= end) { + info->offset = info->pa - start + offset; + info->found = true; + /* end iterator */ + return -1; + } + + return 0; +} + +/* terminate iteration in all cases */ +static int search_pa_from_offset(phys_addr_t start, phys_addr_t end __always_unused, + unsigned long offset, void *arg) +{ + struct pa_info *info = (struct pa_info *)arg; + + if (offset != info->offset) { + pr_warn("iterator bug encountered in %s, iter.offset=%#lx, expect %#lx.\n", + __func__, offset, info->offset); + info->found = false; + return -1; + } + + info->pa = start; + info->found = true; + return -1; +} + +int get_pa_detail_export_region(const struct obmm_export_region *e_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr) +{ + struct pa_info info = { 0 }; + + info.pa = pa; + walk_export_memory(e_reg, 0, e_reg->region.mem_size, search_offset_from_pa, &info); + + /* not found */ + if (!info.found) + return -EFAULT; + + /* found */ + ext_addr->region_type = OBMM_EXPORT_REGION; + ext_addr->regionid = e_reg->region.regionid; + ext_addr->offset = info.offset; + ext_addr->tid = e_reg->tokenid; + ext_addr->uba = e_reg->uba + info.offset; + ext_addr->numa_id = NUMA_NO_NODE; + ext_addr->pa = pa; + + return 0; +} + +int get_offset_detail_export_region(const struct obmm_export_region *e_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr) +{ + struct pa_info info = { 0 }; + + if (offset >= e_reg->region.mem_size) { + pr_err("%s: invalid offset 0x%lx\n", __func__, offset); + return -EINVAL; + } + + info.offset = offset; + walk_export_memory(e_reg, offset, 1, search_pa_from_offset, &info); + if (!info.found) { + /* Offset has been checked at the beginning of this function. If the code reaches + * here, it must be an implementation error. + */ + pr_err("%s: internal bug encountered\n", __func__); + return -ENODATA; + } + + ext_addr->region_type = e_reg->region.type; + ext_addr->regionid = e_reg->region.regionid; + ext_addr->offset = offset; + ext_addr->tid = e_reg->tokenid; + ext_addr->uba = e_reg->uba + offset; + ext_addr->pa = info.pa; + /* to be decided */ + ext_addr->numa_id = NUMA_NO_NODE; + + return 0; +} diff --git a/drivers/ub/obmm/obmm_export_region_ops.h b/drivers/ub/obmm/obmm_export_region_ops.h new file mode 100644 index 000000000000..ef0b90326463 --- /dev/null +++ b/drivers/ub/obmm/obmm_export_region_ops.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_EXPORT_REGION_H +#define OBMM_EXPORT_REGION_H + +#include "obmm_core.h" + +int flush_export_region(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops); +int kernel_pgtable_set_export_invalid(struct obmm_export_region *e_reg, unsigned long offset, + unsigned long length, bool set_nc); +int map_export_region(struct vm_area_struct *vma, struct obmm_export_region *e_reg, + enum obmm_mmap_granu mmap_granu); + +int get_pa_detail_export_region(const struct obmm_export_region *e_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr); + +int get_offset_detail_export_region(const struct obmm_export_region *e_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr); + +#endif diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index 65e2a35f1bb0..256f0caa583a 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -18,6 +18,39 @@ extern "C" { #define OBMM_MAX_PRIV_LEN 512 #define OBMM_MAX_VENDOR_LEN 128 + +#define OBMM_EXPORT_FLAG_ALLOW_MMAP 0x1UL +#define OBMM_EXPORT_FLAG_FAST 0x2UL +#define OBMM_EXPORT_FLAG_MASK (OBMM_EXPORT_FLAG_ALLOW_MMAP | OBMM_EXPORT_FLAG_FAST) + +/* For ordinary register requests, @length and @flags are input arguments while + * @tokenid, @uba and @mem_id are values set by obmm kernel module. For + * register request, @length, @flags, @tokenid and @uba are input to obmm + * kernel module. @mem_id is the only output. + */ +struct obmm_cmd_export { + __u64 size[OBMM_MAX_LOCAL_NUMA_NODES]; + __u64 length; + __u64 flags; + __u64 uba; + __u64 mem_id; + __u32 tokenid; + __s32 pxm_numa; + __u16 priv_len; + __u16 vendor_len; + __u8 deid[16]; + __u8 seid[16]; + const void *vendor_info; + const void *priv; +} __attribute__((aligned(8))); + +#define OBMM_UNEXPORT_FLAG_MASK (0UL) + +struct obmm_cmd_unexport { + __u64 mem_id; + __u64 flags; +} __attribute__((aligned(8))); + enum obmm_query_key_type { OBMM_QUERY_BY_PA, OBMM_QUERY_BY_ID_OFFSET @@ -32,6 +65,8 @@ struct obmm_cmd_addr_query { } __attribute__((aligned(8))); +#define OBMM_CMD_EXPORT _IOWR('x', 0, struct obmm_cmd_export) +#define OBMM_CMD_UNEXPORT _IOW('x', 2, struct obmm_cmd_unexport) #define OBMM_CMD_ADDR_QUERY _IOWR('x', 4, struct obmm_cmd_addr_query) /* cache maintenance operations (not states) */ -- Gitee From 2f227ab915c76fd931b67f1719b95b585665bf73 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:40 +0800 Subject: [PATCH 14/48] obmm: Add user address export support commit 2a3becabc966920fdea924ef7b3872dbd346ff93 openEuler Extend export functionality to support user-specified addresses for memory regions, providing more flexibility for memory sharing use cases. The memory exported should guarantee: - Only hugetlbfs pages are allowed to be exported - All pages must be on the same socket/package - Exported memory cannot be mmap'ed locally (OBMM_EXPORT_FLAG_ALLOW_MMAP flag is explicitly rejected) Key features include: 1. Added a new IOCTL command OBMM_CMD_EXPORT_PID with obmm_cmd_export_pid structure to support exporting memory from specified process address space 2. Core implementation: - New file obmm_export_from_user.c with implementation for: * alloc_export_memory_pid() - Extract pages from user process address space * free_export_memory_pid() - Clean up exported memory resources * obmm_export_pid() - Main function to handle export request - Added mem_description_pid structure to store PID and user virtual address - Added OBMM_REGION_FLAG_MEMORY_FROM_USER flag to track region type 3. Security and validation checks: - Verify process existence and access permissions - Pin pages to prevent page faults during export - Modify page table protections to make exported memory inaccessible - Check NUMA node alignment to ensure all pages are on same package This allows applications to export memory at specific addresses required by their use cases or compatibility requirements. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 2 +- drivers/ub/obmm/obmm_core.c | 32 ++- drivers/ub/obmm/obmm_core.h | 11 + drivers/ub/obmm/obmm_export.c | 10 +- drivers/ub/obmm/obmm_export.h | 3 + drivers/ub/obmm/obmm_export_from_user.c | 364 ++++++++++++++++++++++++ include/uapi/ub/obmm.h | 18 ++ 7 files changed, 430 insertions(+), 10 deletions(-) create mode 100644 drivers/ub/obmm/obmm_export_from_user.c diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 01689289ccec..0c0a1efabb63 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -3,7 +3,7 @@ obmm-y := obmm_core.o \ obmm_cache.o \ - conti_mem_allocator.o \ + obmm_export_from_user.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o \ ubmempool_allocator.o obmm_export_region_ops.o diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index 74fa10539a70..fc1fd2a96b62 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -151,8 +151,7 @@ int obmm_query_by_offset(struct obmm_region *reg, unsigned long offset, if (reg->type == OBMM_EXPORT_REGION) { e_reg = container_of(reg, struct obmm_export_region, region); - ret = get_offset_detail_export_region(e_reg, - offset, ext_addr); + ret = get_offset_detail_export_region(e_reg, offset, ext_addr); } return ret; @@ -172,8 +171,7 @@ int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr) if (region->type == OBMM_EXPORT_REGION) { struct obmm_export_region *e_reg; - e_reg = container_of(region, struct obmm_export_region, - region); + e_reg = container_of(region, struct obmm_export_region, region); ret = get_pa_detail_export_region(e_reg, pa, ext_addr); } @@ -394,12 +392,13 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, struct obmm_cmd_export create; struct obmm_cmd_unexport unexport; struct obmm_cmd_addr_query query; + struct obmm_cmd_export_pid export_pid; } cmd_param; switch (cmd) { case OBMM_CMD_EXPORT: { ret = (int)copy_from_user(&cmd_param.create, (void __user *)arg, - sizeof(struct obmm_cmd_export)); + sizeof(struct obmm_cmd_export)); if (ret) { pr_err("failed to load export argument"); return -EFAULT; @@ -410,7 +409,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, return ret; ret = (int)copy_to_user((void __user *)arg, &cmd_param.create, - sizeof(struct obmm_cmd_export)); + sizeof(struct obmm_cmd_export)); if (ret) { pr_err("failed to write export result"); return -EFAULT; @@ -418,7 +417,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, } break; case OBMM_CMD_UNEXPORT: { ret = (int)copy_from_user(&cmd_param.unexport, (void __user *)arg, - sizeof(struct obmm_cmd_unexport)); + sizeof(struct obmm_cmd_unexport)); if (ret) { pr_err("failed to load unexport argument"); return -EFAULT; @@ -445,6 +444,25 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, return -EFAULT; } } break; + case OBMM_CMD_EXPORT_PID: { + ret = (int)copy_from_user(&cmd_param.export_pid, (void __user *)arg, + sizeof(struct obmm_cmd_export_pid)); + if (ret) { + pr_err("Failed to load export_pid param.\n"); + return -EFAULT; + } + + ret = obmm_export_pid(&cmd_param.export_pid); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.export_pid, + sizeof(struct obmm_cmd_export_pid)); + if (ret) { + pr_err("failed to write export_pid result.\n"); + return -EFAULT; + } + } break; default: ret = -ENOTTY; } diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index f00797a3d99f..a60dd9f4bc41 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -100,11 +100,21 @@ static inline bool region_allow_mmap(const struct obmm_region *reg) { return reg->flags & OBMM_REGION_FLAG_ALLOW_MMAP; } +static inline bool region_memory_from_user(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_MEMORY_FROM_USER; +} static inline bool region_fast_alloc(const struct obmm_region *reg) { return reg->flags & OBMM_REGION_FLAG_FAST_ALLOC; } +struct mem_description_pid { + int pid; + void __user *user_va; + int pinned; + u64 start_time; +}; struct mem_description_pool { struct list_head head[OBMM_MAX_LOCAL_NUMA_NODES]; }; @@ -118,6 +128,7 @@ struct obmm_export_region { /* physical pages */ union { + struct mem_description_pid mem_desc_pid; struct mem_description_pool mem_desc; }; diff --git a/drivers/ub/obmm/obmm_export.c b/drivers/ub/obmm/obmm_export.c index 009020d2e2dd..50f21dde5f62 100644 --- a/drivers/ub/obmm/obmm_export.c +++ b/drivers/ub/obmm/obmm_export.c @@ -137,7 +137,10 @@ static int teardown_ummu(struct obmm_export_region *e_reg) */ static int alloc_export_memory(struct obmm_export_region *e_reg) { - return alloc_export_memory_pool(e_reg); + if (region_memory_from_user(&e_reg->region)) + return alloc_export_memory_pid(e_reg); + else + return alloc_export_memory_pool(e_reg); } static void free_export_memory_pool(struct obmm_export_region *e_reg) @@ -148,7 +151,10 @@ static void free_export_memory_pool(struct obmm_export_region *e_reg) static void free_export_memory(struct obmm_export_region *e_reg) { - free_export_memory_pool(e_reg); + if (region_memory_from_user(&e_reg->region)) + free_export_memory_pid(e_reg); + else + free_export_memory_pool(e_reg); } /* Ensure all user inputs are properly converted and filled into the region. */ diff --git a/drivers/ub/obmm/obmm_export.h b/drivers/ub/obmm/obmm_export.h index 1ced6d14569a..68b6ab2b0e86 100644 --- a/drivers/ub/obmm/obmm_export.h +++ b/drivers/ub/obmm/obmm_export.h @@ -10,9 +10,12 @@ int obmm_export_common(struct obmm_export_region *e_reg); int export_flags_to_region_flags(unsigned long *region_flags, unsigned long user_flags); +int alloc_export_memory_pid(struct obmm_export_region *e_reg); +void free_export_memory_pid(struct obmm_export_region *e_reg); int alloc_export_memory_pool(struct obmm_export_region *e_reg); int obmm_unexport_common(struct obmm_export_region *e_reg); int obmm_export_from_pool(struct obmm_cmd_export *cmd_export); +int obmm_export_pid(struct obmm_cmd_export_pid *export_pid); int obmm_unexport(const struct obmm_cmd_unexport *cmd_unexport); int set_export_vendor(struct obmm_export_region *e_reg, const void __user *vendor_info, diff --git a/drivers/ub/obmm/obmm_export_from_user.c b/drivers/ub/obmm/obmm_export_from_user.c new file mode 100644 index 000000000000..e1cd35416e63 --- /dev/null +++ b/drivers/ub/obmm/obmm_export_from_user.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_export_region_ops.h" +#include "obmm_export.h" + +static struct task_struct *get_tsk_struct(pid_t pid) +{ + struct task_struct *task; + + if (!pid) { + get_task_struct(current); + return current; + } + + rcu_read_lock(); + task = pid_task(find_pid_ns(pid, &init_pid_ns), PIDTYPE_PID); + if (task) + get_task_struct(task); + rcu_read_unlock(); + + return task; +} + +void free_export_memory_pid(struct obmm_export_region *e_reg) +{ + struct mem_description_pid *desc = &e_reg->mem_desc_pid; + struct mm_struct *mm = NULL; + struct task_struct *tsk; + struct scatterlist *sg; + unsigned int i; + + WARN_ON(desc->pid == 0); + tsk = get_tsk_struct(desc->pid); + if (tsk && tsk->start_time != desc->start_time) { + /* if the process is still alive or its PID has not been reused */ + pr_err("pid(%d) is reused.\n", desc->pid); + put_task_struct(tsk); + tsk = NULL; + } + + if (tsk) + mm = get_task_mm(tsk); + + if (mm) { + atomic64_sub(desc->pinned, &mm->pinned_vm); + WARN_ON(modify_pgtable_prot(mm, desc->user_va, e_reg->region.mem_size, true)); + mmput(mm); + } + + if (tsk) + put_task_struct(tsk); + + WARN_ON(kernel_pgtable_set_export_invalid(e_reg, 0, e_reg->region.mem_size, false)); + + /* unpin all pages from sgt */ + for_each_sgtable_sg(&e_reg->sgt, sg, i) + unpin_user_page_range_dirty_lock(sg_page(sg), DIV_ROUND_UP(sg->length, PAGE_SIZE), + true); + + sg_free_table(&e_reg->sgt); +} + +static bool hisi_workarounds_check_page_list(struct obmm_export_region *reg, struct page **pages, + int count) +{ + nodemask_t node_mask; + unsigned int node; + int i, nid; + + nodes_clear(node_mask); + for (i = 0; i < count; i++) { + struct page *p = pages[i]; + + if (!PageHuge(p)) { + pr_err("Only hugetlbfs pages are allowed\n"); + return false; + } + +#ifdef CONFIG_NUMA + nid = page_to_nid(p); +#else + nid = 0; +#endif + if (nid < 0 || nid >= OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Invalid node ID %d for page %p\n", nid, p); + return false; + } + + node_set(nid, node_mask); + reg->node_mem_size[nid] += PAGE_SIZE; + } + + for_each_node_mask(node, node_mask) { + pr_debug("Page resides in node %u\n", node); + reg->node_count = node + 1; + } + if (reg->affinity > OBMM_MAX_LOCAL_NUMA_NODES) { + pr_err("Invalid pxm_numa %d\n", reg->affinity); + return false; + } + node_set(reg->affinity, node_mask); + + return nodes_on_same_package(&node_mask); +} + +int alloc_export_memory_pid(struct obmm_export_region *e_reg) +{ + unsigned long new_pinned, nrpages; + struct mem_description_pid *desc = &e_reg->mem_desc_pid; + struct page **page_list; + struct task_struct *tsk; + struct mm_struct *mm; + bool remote_mm; + int pinned, ret = 0; + int locked = 0; + + nrpages = e_reg->region.mem_size >> PAGE_SHIFT; + if (!nrpages) { + pr_err("export pages must > 1\n"); + return -EINVAL; + } + + tsk = get_tsk_struct(desc->pid); + if (!tsk) { + pr_err("get tsk from pid(%d) failed.\n", desc->pid); + return -ESRCH; + } + + mm = get_task_mm(tsk); + if (!mm) { + ret = -ESRCH; + pr_err("get mm from pid(%d) failed.\n", desc->pid); + goto drop_tsk; + } + desc->start_time = tsk->start_time; + + remote_mm = mm != current->mm; + if (!remote_mm) + desc->pid = current->tgid; + + pr_debug("exporting from %s\n", remote_mm ? "remote" : "current"); + + new_pinned = (unsigned long)atomic64_add_return(nrpages, &mm->pinned_vm); + + page_list = kvmalloc_array(nrpages, sizeof(struct page *), GFP_KERNEL); + if (!page_list) { + ret = -ENOMEM; + goto drop_pinned; + } + + pr_debug("exporting useraddr: pid(%d) va(%p) size(%#llx)\n", desc->pid, desc->user_va, + e_reg->region.mem_size); + + mmap_read_lock(mm); + locked = 1; + if (remote_mm) { + pinned = pin_user_pages_remote(mm, (uintptr_t)desc->user_va, nrpages, FOLL_WRITE, + page_list, &locked); + } else { + pinned = pin_user_pages_fast((uintptr_t)desc->user_va, nrpages, FOLL_WRITE, + page_list); + } + if (locked) + mmap_read_unlock(mm); + + if (pinned < 0) { + pr_err("pin memory failed, %d.\n", pinned); + ret = pinned; + goto free_page_list; + } + + if (pinned != (int)nrpages) { + pr_err("failed to pin user pages(%d/%lu)!\n", pinned, nrpages); + ret = -ENOMEM; + goto free_page_list; + } + + if (!hisi_workarounds_check_page_list(e_reg, page_list, nrpages)) { + pr_err("hisi workarounds check no passing.\n"); + ret = -EOPNOTSUPP; + goto free_page_list; + } + + ret = sg_alloc_table_from_pages_segment(&e_reg->sgt, page_list, nrpages, 0, + e_reg->region.mem_size, SZ_1G, GFP_KERNEL); + if (ret) { + pr_err("alloc sg table failed, %pe.\n", ERR_PTR(ret)); + goto free_page_list; + } + + ret = kernel_pgtable_set_export_invalid(e_reg, 0, e_reg->region.mem_size, true); + if (ret) + goto out_free_sg; + + ret = modify_pgtable_prot(mm, desc->user_va, e_reg->region.mem_size, false); + if (ret) + goto out_set_kernel_cacheable; + + ret = obmm_region_flush_range(&e_reg->region, 0, e_reg->region.mem_size, + OBMM_SHM_CACHE_WB_INVAL); + if (ret) + goto out_reset_pgtable_prot; + + desc->pinned = pinned; + kvfree(page_list); /* all pages saved in scatterlist */ + mmput(mm); + put_task_struct(tsk); + pr_debug("exporting memory prepared.\n"); + + return 0; + +out_reset_pgtable_prot: + WARN_ON(modify_pgtable_prot(mm, desc->user_va, e_reg->region.mem_size, true)); +out_set_kernel_cacheable: + WARN_ON(kernel_pgtable_set_export_invalid(e_reg, 0, e_reg->region.mem_size, false)); +out_free_sg: + sg_free_table(&e_reg->sgt); +free_page_list: + if (pinned > 0) + unpin_user_pages_dirty_lock(page_list, pinned, 0); + kvfree(page_list); +drop_pinned: + atomic64_sub(nrpages, &mm->pinned_vm); + mmput(mm); +drop_tsk: + put_task_struct(tsk); + return ret; +} + +static int obmm_cmd_export_pid_allowed(struct obmm_cmd_export_pid *cmd) +{ + if (cmd->flags & ~(OBMM_EXPORT_FLAG_MASK)) { + pr_err("invalid flags %#llx encountered in export_user_addr.\n", cmd->flags); + return -EINVAL; + } + if (cmd->flags & OBMM_EXPORT_FLAG_ALLOW_MMAP) { + pr_err("ALLOW_MMAP flag is not allowed in export_user_addr.\n"); + return -EINVAL; + } + + if (cmd->length == 0) { + pr_err("export sizeof 0 memory is not allowed.\n"); + return -EINVAL; + } + + if (cmd->length % OBMM_BASIC_GRANU) { + pr_err("export memory size is not aligned to OBMM basic granularity.\n"); + return -EINVAL; + } + + return 0; +} + +static struct obmm_export_region * +alloc_export_region_from_obmm_cmd_export_pid(const struct obmm_cmd_export_pid *export_pid) +{ + int ret; + + struct obmm_export_region *e_reg = kzalloc(sizeof(struct obmm_export_region), GFP_KERNEL); + + if (e_reg == NULL) + return ERR_PTR(-ENOMEM); + + e_reg->mem_desc_pid.pid = export_pid->pid; + e_reg->mem_desc_pid.user_va = export_pid->va; + e_reg->region.mem_size = export_pid->length; + e_reg->region.type = OBMM_EXPORT_REGION; + e_reg->region.mem_cap = 0; + e_reg->affinity = export_pid->pxm_numa; + memcpy(e_reg->deid, export_pid->deid, sizeof(e_reg->deid)); + ret = export_flags_to_region_flags(&e_reg->region.flags, export_pid->flags); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + e_reg->region.flags |= OBMM_REGION_FLAG_MEMORY_FROM_USER; + ret = set_obmm_region_priv(&e_reg->region, export_pid->priv_len, export_pid->priv); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + ret = set_export_vendor(e_reg, export_pid->vendor_info, export_pid->vendor_len); + if (ret) { + kfree(e_reg); + return ERR_PTR(ret); + } + return e_reg; +} + +static void print_export_pid_param(const struct obmm_cmd_export_pid *cmd_export_pid) +{ + pr_info("obmm_export_useraddr: pid=%d length=%#llx priv_len=%u deid=" + EID_FMT64 " vendor_len=%u\n", + cmd_export_pid->pid, cmd_export_pid->length, cmd_export_pid->priv_len, + EID_ARGS64_H(cmd_export_pid->deid), EID_ARGS64_L(cmd_export_pid->deid), + cmd_export_pid->vendor_len); +} + +int obmm_export_pid(struct obmm_cmd_export_pid *export_pid) +{ + struct obmm_export_region *e_reg; + uint64_t uba, mem_id; + uint32_t token_id; + int ret; + + print_export_pid_param(export_pid); + ret = obmm_cmd_export_pid_allowed(export_pid); + if (ret) + return ret; + + e_reg = alloc_export_region_from_obmm_cmd_export_pid(export_pid); + if (IS_ERR(e_reg)) + return PTR_ERR(e_reg); + + ret = init_obmm_region(&e_reg->region); + if (ret) + goto out_free_reg; + + ret = obmm_export_common(e_reg); + if (ret) + goto out_unit_reg; + + token_id = e_reg->tokenid; + uba = e_reg->uba; + mem_id = (uint64_t)e_reg->region.regionid; + + ret = register_obmm_region(&e_reg->region); + if (ret) + goto out_unexport; + activate_obmm_region(&e_reg->region); + + export_pid->tokenid = token_id; + export_pid->uba = uba; + export_pid->mem_id = mem_id; + + pr_info("obmm_export_useraddr: mem_id=%llu online.\n", mem_id); + return 0; + +out_unexport: + obmm_unexport_common(e_reg); +out_unit_reg: + uninit_obmm_region(&e_reg->region); +out_free_reg: + free_export_region(e_reg); + return ret; +} diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index 256f0caa583a..72c7dd5d6795 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -23,6 +23,23 @@ extern "C" { #define OBMM_EXPORT_FLAG_FAST 0x2UL #define OBMM_EXPORT_FLAG_MASK (OBMM_EXPORT_FLAG_ALLOW_MMAP | OBMM_EXPORT_FLAG_FAST) +struct obmm_cmd_export_pid { + void *va; + __u64 length; + __u64 flags; + __u64 uba; + __u64 mem_id; + __u32 tokenid; + __s32 pid; + __s32 pxm_numa; + __u16 priv_len; + __u16 vendor_len; + __u8 deid[16]; + __u8 seid[16]; + const void *priv; + const void *vendor_info; +} __attribute__((aligned(8))); + /* For ordinary register requests, @length and @flags are input arguments while * @tokenid, @uba and @mem_id are values set by obmm kernel module. For * register request, @length, @flags, @tokenid and @uba are input to obmm @@ -68,6 +85,7 @@ struct obmm_cmd_addr_query { #define OBMM_CMD_EXPORT _IOWR('x', 0, struct obmm_cmd_export) #define OBMM_CMD_UNEXPORT _IOW('x', 2, struct obmm_cmd_unexport) #define OBMM_CMD_ADDR_QUERY _IOWR('x', 4, struct obmm_cmd_addr_query) +#define OBMM_CMD_EXPORT_PID _IOWR('x', 5, struct obmm_cmd_export_pid) /* cache maintenance operations (not states) */ /* no cache maintenance (nops) */ -- Gitee From 6e885355bb3adcb5fc8242bf95213ca481a71837 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:41 +0800 Subject: [PATCH 15/48] obmm: Add address validation and checking commit 67ffb2edb43cde96368000620a63d0f6be11820f openEuler Implement comprehensive address validation to ensure security and stability of memory import operation. Validations include: - Address range and boundary checking - Alignment verification for different architectures - Permission and access right validation - Memory region overlap detection - User/kernel space address separation These checks prevent invalid memory accesses and ensure robust operation of the OBMM framework. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 3 +- drivers/ub/obmm/obmm_addr_check.c | 139 ++++++++++++++++++++++++++++++ drivers/ub/obmm/obmm_addr_check.h | 35 ++++++++ drivers/ub/obmm/obmm_core.c | 4 + 4 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/obmm_addr_check.c create mode 100644 drivers/ub/obmm/obmm_addr_check.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 0c0a1efabb63..40688aec5a81 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -5,6 +5,7 @@ obmm-y := obmm_core.o \ obmm_cache.o \ obmm_export_from_user.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o \ - ubmempool_allocator.o obmm_export_region_ops.o + ubmempool_allocator.o obmm_export_region_ops.o \ + obmm_addr_check.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/obmm_addr_check.c b/drivers/ub/obmm/obmm_addr_check.c new file mode 100644 index 000000000000..d4c263e2f933 --- /dev/null +++ b/drivers/ub/obmm/obmm_addr_check.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#define pr_fmt(fmt) "OBMM: addr_check:" fmt + +#include +#include +#include +#include +#include + +#include "obmm_addr_check.h" + +struct pa_checker { + spinlock_t lock; + struct maple_tree pa_ranges; +}; +static struct pa_checker g_pa_checker; + +static bool is_same_pa_range(const struct obmm_pa_range *l, const struct obmm_pa_range *r) +{ + bool same = l->start == r->start && l->end == r->end; + + if (!same) + pr_err("unmatched pa range: [%pa, %pa] vs. [%pa, %pa]\n", &l->start, &l->end, + &r->start, &r->end); + return same; +} + +int occupy_pa_range(const struct obmm_pa_range *pa_range) +{ + int ret; + void *persist_info; + unsigned long flags; + + persist_info = kmemdup(pa_range, sizeof(*pa_range), GFP_KERNEL); + if (persist_info == NULL) + return -ENOMEM; + + spin_lock_irqsave(&g_pa_checker.lock, flags); + ret = mtree_insert_range(&g_pa_checker.pa_ranges, (unsigned long)pa_range->start, + (unsigned long)pa_range->end, persist_info, GFP_ATOMIC); + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + + if (ret != 0) { + kfree(persist_info); + pr_err("failed to occupy PA range [%pa, %pa]: ret=%pe\n", &pa_range->start, + &pa_range->end, ERR_PTR(ret)); + return ret; + } + pr_debug("pa_check: add [%pa,%pa]->{user=%s,data=%p}\n", &pa_range->start, &pa_range->end, + "direct_import", pa_range->info.data); + return 0; +} + +int free_pa_range(const struct obmm_pa_range *pa_range) +{ + int ret; + const char *user; + void *entry; + unsigned long flags; + + spin_lock_irqsave(&g_pa_checker.lock, flags); + entry = mtree_erase(&g_pa_checker.pa_ranges, (unsigned long)pa_range->start); + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + if (!entry) { + pr_err("PA range [%pa, %pa], not found.\n", &pa_range->start, &pa_range->end); + return -EFAULT; + } + ret = 0; + if (!is_same_pa_range((const struct obmm_pa_range *)entry, pa_range)) { + /* expected to be UNREACHABLE */ + pr_err("BUG: PA range does not fully match.\n"); + ret = -ENOTRECOVERABLE; + } + user = "import"; + pr_debug("pa_check: del [%pa,?]->{user=%s,data=%p}\n", &pa_range->start, user, + ((struct obmm_pa_range *)entry)->info.data); + kfree(entry); + return ret; +} + +int query_pa_range(phys_addr_t addr, struct obmm_addr_info *info) +{ + unsigned long index, flags; + const struct obmm_pa_range *retrieved; + + if (info == NULL) + return -EINVAL; + + index = (unsigned long)addr; + spin_lock_irqsave(&g_pa_checker.lock, flags); + retrieved = (const struct obmm_pa_range *)mt_find(&g_pa_checker.pa_ranges, &index, index); + if (retrieved) { + info->user = retrieved->info.user; + info->data = retrieved->info.data; + } + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + + if (!retrieved) + return -EFAULT; + return 0; +} + +int update_pa_range(phys_addr_t addr, const struct obmm_addr_info *info) +{ + unsigned long index, flags; + struct obmm_pa_range *retrieved; + + if (info == NULL) + return -EINVAL; + + index = (unsigned long)addr; + spin_lock_irqsave(&g_pa_checker.lock, flags); + retrieved = (struct obmm_pa_range *)mt_find(&g_pa_checker.pa_ranges, &index, index); + if (retrieved) { + retrieved->info.user = info->user; + retrieved->info.data = info->data; + } + spin_unlock_irqrestore(&g_pa_checker.lock, flags); + + if (!retrieved) + return -EFAULT; + pr_debug("pa_check: update [%pa,?]->{user=%s,data=%p}\n", &addr, "direct_import", + info->data); + return 0; +} + +void module_addr_check_init(void) +{ + mt_init(&g_pa_checker.pa_ranges); + spin_lock_init(&g_pa_checker.lock); +} +void module_addr_check_exit(void) +{ + mtree_destroy(&g_pa_checker.pa_ranges); +} diff --git a/drivers/ub/obmm/obmm_addr_check.h b/drivers/ub/obmm/obmm_addr_check.h new file mode 100644 index 000000000000..9acb4db2773b --- /dev/null +++ b/drivers/ub/obmm/obmm_addr_check.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_ADDR_DUP_CHECK_H +#define OBMM_ADDR_DUP_CHECK_H + +#include + +enum obmm_addr_user { + OBMM_ADDR_USER_DIRECT_IMPORT, +}; +struct obmm_addr_info { + enum obmm_addr_user user; + void *data; +}; + +struct obmm_pa_range { + phys_addr_t start; + phys_addr_t end; + struct obmm_addr_info info; +}; + +int occupy_pa_range(const struct obmm_pa_range *pa_range); +int free_pa_range(const struct obmm_pa_range *pa_range); + +/* @addr is the search key and @info stores output value */ +int query_pa_range(phys_addr_t addr, struct obmm_addr_info *info); +/* @addr is the search key and @info stores the overwrite value */ +int update_pa_range(phys_addr_t addr, const struct obmm_addr_info *info); + +void module_addr_check_init(void); +void module_addr_check_exit(void); + +#endif diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index fc1fd2a96b62..645f18602fae 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -23,6 +23,7 @@ #include "obmm_cache.h" #include "obmm_export_region_ops.h" #include "ubmempool_allocator.h" +#include "obmm_addr_check.h" #include "obmm_export.h" #include "obmm_core.h" @@ -500,6 +501,8 @@ static int __init obmm_init(void) spin_lock_init(&g_obmm_ctx_info.lock); INIT_LIST_HEAD(&g_obmm_ctx_info.regions); + module_addr_check_init(); + pr_info("obmm_module: init completed\n"); return ret; @@ -512,6 +515,7 @@ static void __exit obmm_exit(void) { pr_info("obmm_module: exit started\n"); + module_addr_check_exit(); misc_deregister(&obmm_dev_handle); ubmempool_allocator_exit(); -- Gitee From 301c5d4ef5e3165d264de220aaffb36b3786d5e9 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:41 +0800 Subject: [PATCH 16/48] obmm: Add memory region import functionality commit 8a5e2c9e2b164582b11064dddcf633671b412070 openEuler This patch adds user-space interfaces for importing and releasing local physical memory that has been configured by cluster management software and is within the range managed by the UBUS driver. The imported memory can be managed by the remote NUMA memory management module (numa.remote) based on user configuration. Key components added: - User-space interfaces: OBMM_CMD_IMPORT and OBMM_CMD_UNIMPORT ioctl commands - Core data structure: obmm_import_region for representing imported memory - Support functions: extended existing functions for cache flushing, mapping, and offset query operations to work with import regions - Memory management: functions to prepare and release imported memory, including NUMA node registration and address range management This completes the memory sharing framework by allowing hosts to both export and import memory regions. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 2 +- drivers/ub/obmm/obmm_cache.c | 11 +- drivers/ub/obmm/obmm_core.c | 43 ++- drivers/ub/obmm/obmm_core.h | 32 ++ drivers/ub/obmm/obmm_import.c | 531 ++++++++++++++++++++++++++++++++++ drivers/ub/obmm/obmm_import.h | 24 ++ include/uapi/ub/obmm.h | 31 ++ 7 files changed, 670 insertions(+), 4 deletions(-) create mode 100644 drivers/ub/obmm/obmm_import.c create mode 100644 drivers/ub/obmm/obmm_import.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 40688aec5a81..472d99ec31b4 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0+ obmm-y := obmm_core.o \ - obmm_cache.o \ + obmm_cache.o obmm_import.o \ obmm_export_from_user.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o \ ubmempool_allocator.o obmm_export_region_ops.o \ diff --git a/drivers/ub/obmm/obmm_cache.c b/drivers/ub/obmm/obmm_cache.c index 53f0840a749b..534be7788501 100644 --- a/drivers/ub/obmm/obmm_cache.c +++ b/drivers/ub/obmm/obmm_cache.c @@ -14,6 +14,7 @@ #include "obmm_core.h" #include "obmm_export_region_ops.h" +#include "obmm_import.h" #include "obmm_cache.h" static bool skip_cache_maintain; @@ -122,6 +123,7 @@ int obmm_region_flush_range(struct obmm_region *reg, unsigned long offset, unsig uint8_t cache_ops) { int ret; + struct obmm_import_region *i_reg; struct obmm_export_region *e_reg; /* validation */ @@ -141,8 +143,13 @@ int obmm_region_flush_range(struct obmm_region *reg, unsigned long offset, unsig pr_debug("flush cache: region=%d, offset=0x%lx, length=0x%lx, cache_ops=%u\n", reg->regionid, offset, length, cache_ops); /* clear cache and ubus queue */ - e_reg = container_of(reg, struct obmm_export_region, region); - ret = flush_export_region(e_reg, offset, length, cache_ops); + if (reg->type == OBMM_IMPORT_REGION) { + i_reg = container_of(reg, struct obmm_import_region, region); + ret = flush_import_region(i_reg, offset, length, cache_ops); + } else { + e_reg = container_of(reg, struct obmm_export_region, region); + ret = flush_export_region(e_reg, offset, length, cache_ops); + } if (ret) pr_err("flush failed: region=%d, offset=0x%lx, length=0x%lx, cache_ops=%u\n", diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index 645f18602fae..2ebed5b7277b 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -23,6 +23,7 @@ #include "obmm_cache.h" #include "obmm_export_region_ops.h" #include "ubmempool_allocator.h" +#include "obmm_import.h" #include "obmm_addr_check.h" #include "obmm_export.h" #include "obmm_core.h" @@ -149,12 +150,15 @@ int obmm_query_by_offset(struct obmm_region *reg, unsigned long offset, { int ret; struct obmm_export_region *e_reg; + struct obmm_import_region *i_reg; if (reg->type == OBMM_EXPORT_REGION) { e_reg = container_of(reg, struct obmm_export_region, region); ret = get_offset_detail_export_region(e_reg, offset, ext_addr); + } else { + i_reg = container_of(reg, struct obmm_import_region, region); + ret = get_offset_detail_import(i_reg, offset, ext_addr); } - return ret; } @@ -169,6 +173,12 @@ int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr) spin_lock_irqsave(lock, flags); list_for_each_entry(region, &g_obmm_ctx_info.regions, node) { + if (region->type == OBMM_IMPORT_REGION) { + struct obmm_import_region *i_reg; + + i_reg = container_of(region, struct obmm_import_region, region); + ret = get_pa_detail_import(i_reg, pa, ext_addr); + } if (region->type == OBMM_EXPORT_REGION) { struct obmm_export_region *e_reg; @@ -391,7 +401,9 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, int ret; union { struct obmm_cmd_export create; + struct obmm_cmd_import import; struct obmm_cmd_unexport unexport; + struct obmm_cmd_unimport unimport; struct obmm_cmd_addr_query query; struct obmm_cmd_export_pid export_pid; } cmd_param; @@ -416,6 +428,25 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, return -EFAULT; } } break; + case OBMM_CMD_IMPORT: { + ret = (int)copy_from_user(&cmd_param.import, (void __user *)arg, + sizeof(struct obmm_cmd_import)); + if (ret) { + pr_err("failed to load import argument"); + return -EFAULT; + } + + ret = obmm_import(&cmd_param.import); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.import, + sizeof(struct obmm_cmd_import)); + if (ret) { + pr_err("failed to write import result"); + return -EFAULT; + } + } break; case OBMM_CMD_UNEXPORT: { ret = (int)copy_from_user(&cmd_param.unexport, (void __user *)arg, sizeof(struct obmm_cmd_unexport)); @@ -426,6 +457,16 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = obmm_unexport(&cmd_param.unexport); } break; + case OBMM_CMD_UNIMPORT: { + ret = (int)copy_from_user(&cmd_param.unimport, (void __user *)arg, + sizeof(struct obmm_cmd_unimport)); + if (ret) { + pr_err("failed to load unimport argument"); + return -EFAULT; + } + + ret = obmm_unimport(&cmd_param.unimport); + } break; case OBMM_CMD_ADDR_QUERY: { ret = (int)copy_from_user(&cmd_param.query, (void __user *)arg, sizeof(struct obmm_cmd_addr_query)); diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index a60dd9f4bc41..18d3406123bc 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -42,6 +42,7 @@ extern size_t __obmm_memseg_size; enum obmm_region_type { OBMM_EXPORT_REGION, + OBMM_IMPORT_REGION }; enum obmm_mmap_granu { @@ -69,6 +70,7 @@ struct obmm_region { /* unique within host -- can be used as an access handle */ int regionid; + /* import or export */ enum obmm_region_type type; unsigned long flags; @@ -96,6 +98,10 @@ struct obmm_region { unsigned char priv[OBMM_MAX_PRIV_LEN]; }; +static inline bool region_numa_remote(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_NUMA_REMOTE; +} static inline bool region_allow_mmap(const struct obmm_region *reg) { return reg->flags & OBMM_REGION_FLAG_ALLOW_MMAP; @@ -109,6 +115,25 @@ static inline bool region_fast_alloc(const struct obmm_region *reg) return reg->flags & OBMM_REGION_FLAG_FAST_ALLOC; } +struct obmm_import_region { + struct obmm_region region; + + u32 dcna; + u32 scna; + + u64 pa; + + /* imported NUMA node */ + int numa_id; + /* the base_dist passed in import, which in some scenario might be an ignored value. It is + * stored here make error rollback feasible. + */ + u8 base_dist; + + u8 deid[16]; + u8 seid[16]; +}; + struct mem_description_pid { int pid; void __user *user_va; @@ -148,6 +173,13 @@ struct obmm_export_region { u8 deid[16]; }; +struct obmm_datapath { + u32 scna; + u32 dcna; + const u8 *seid; + const u8 *deid; +}; + struct obmm_ctx_info { /* active */ struct list_head regions; diff --git a/drivers/ub/obmm/obmm_import.c b/drivers/ub/obmm/obmm_import.c new file mode 100644 index 000000000000..0fd8a969c262 --- /dev/null +++ b/drivers/ub/obmm/obmm_import.c @@ -0,0 +1,531 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#include +#include +#include +#include + +#include +#include + +#include "obmm_core.h" +#include "obmm_cache.h" +#include "obmm_import.h" +#include "obmm_addr_check.h" + +static unsigned long get_pa_range_mem_cap(u32 scna, phys_addr_t pa, size_t size) +{ + phys_addr_t pa_start = pa; + phys_addr_t pa_end = pa + size - 1; + unsigned long mem_cap = 0; + + if (ub_memory_validate_pa(scna, pa_start, pa_end, true)) + mem_cap |= OBMM_MEM_ALLOW_CACHEABLE_MMAP; + if (ub_memory_validate_pa(scna, pa_start, pa_end, false)) + mem_cap |= OBMM_MEM_ALLOW_NONCACHEABLE_MMAP; + if (mem_cap == 0) + pr_err("PA range invalid. Non-UBMEM memory cannot be mmaped as import memory: pa=%pa, size=%#zx\n", + &pa_start, size); + + return mem_cap; +} + +static int setup_pa(struct obmm_import_region *i_reg) +{ + i_reg->region.mem_cap = + get_pa_range_mem_cap(i_reg->scna, i_reg->pa, i_reg->region.mem_size); + if (i_reg->region.mem_cap == 0) + return -EINVAL; + + return 0; +} + +/* NOTE: do not clear PA in the teardown process. Error rollback procedure may rely on it. */ +static int teardown_pa(struct obmm_import_region *i_reg) +{ + return 0; +} + +static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) +{ + int ret; + + pr_info("call external: remove_memory_remote(nid=%d, pa=%#llx, size=%#llx)\n", + i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); + ret = remove_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); + pr_debug("external called: remove_memory_remote, ret=%pe\n", ERR_PTR(ret)); + /* a full rollback is still possible: check whether this is a full teardown */ + if (ret != 0 && !force) + return ret; + + return ret; +} + +static bool is_numa_base_dist_valid(uint8_t base_dist) +{ + if (base_dist > MAX_NUMA_DIST) { + pr_err("invalid numa base distance %d: out of valid range.\n", base_dist); + return false; + } + if (base_dist != 0 && base_dist <= LOCAL_DISTANCE) { + pr_err("invalid numa base distance %d: reserved values used.\n", base_dist); + return false; + } + return true; +} + +static int obmm_set_numa_distance(unsigned int cna, int nid_remote, uint8_t base_dist) +{ + int nid_local, nid, min_dist, i = 0; + int node_distances[OBMM_MAX_LOCAL_NUMA_NODES]; + int nids[OBMM_MAX_LOCAL_NUMA_NODES]; + + if (!is_numa_base_dist_valid(base_dist)) + return -EINVAL; + + nid_local = ub_mem_get_numa_id(cna); + pr_debug("for cna = %#x, get local node = %d\n", cna, nid_local); + if (nid_local < 0) { + pr_err("failed to set numa distance: bus controller with CNA=%u has nid=%d.", cna, + nid_local); + return -ENODEV; + } + + if (base_dist == 0) + return 0; + + min_dist = __node_distance(nid_local, nid_local); + + for_each_online_local_node(nid) { + nids[i] = nid; + node_distances[i++] = + min(MAX_NUMA_DIST, base_dist + __node_distance(nid_local, nid) - min_dist); + } + + return numa_remote_set_distance(nid_remote, nids, node_distances, i); +} + +static int setup_remote_numa(struct obmm_import_region *i_reg) +{ + int ret, flags; + + flags = MEMORY_DIRECT_ONLINE; + if (!(i_reg->region.mem_cap & OBMM_MEM_ALLOW_CACHEABLE_MMAP)) { + pr_err("PA range invalid. Cacheable memory cannot be managed with numa.remote: pa=%pa, size=%#llx\n", + &i_reg->pa, i_reg->region.mem_size); + return -EINVAL; + } + pr_info("call external: add_memory_remote(nid=%d, start=0x%llx, size=0x%llx, flags=%d)\n", + i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, flags); + ret = add_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, flags); + pr_debug("external called: add_memory_remote() returned %d\n", ret); + if (ret < 0) { + pr_err("Remote NUMA creation failed: %d\n", ret); + return -EPERM; + } + WARN_ON(i_reg->numa_id != NUMA_NO_NODE && i_reg->numa_id != ret); + i_reg->numa_id = ret; + + ret = obmm_set_numa_distance(i_reg->scna, i_reg->numa_id, i_reg->base_dist); + if (ret < 0) { + pr_err("Failed to set remote numa distance: %pe\n", ERR_PTR(ret)); + goto out_teardown_remote_numa; + } + + return 0; +out_teardown_remote_numa: + WARN_ON(teardown_remote_numa(i_reg, true)); + return ret; +} + +static inline int occupy_addr_range(const struct obmm_import_region *i_reg) +{ + struct obmm_pa_range pa; + + pa.start = i_reg->pa; + pa.end = i_reg->pa + i_reg->region.mem_size - 1; + pa.info.user = OBMM_ADDR_USER_DIRECT_IMPORT; + pa.info.data = (void *)i_reg; + + return occupy_pa_range(&pa); +} + +static int free_addr_range(const struct obmm_import_region *i_reg) +{ + struct obmm_pa_range pa; + + pa.start = i_reg->pa; + pa.end = i_reg->pa + i_reg->region.mem_size - 1; + + return free_pa_range(&pa); +} + +static int prepare_import_memory(struct obmm_import_region *i_reg) +{ + int ret, rollback_ret; + + if (!validate_scna(i_reg->scna)) + return -ENODEV; + + ret = occupy_addr_range(i_reg); + if (ret) + return ret; + + ret = setup_pa(i_reg); + if (ret) + goto out_free_addr_range; + + /* register numa node */ + if (region_numa_remote(&i_reg->region)) { + ret = setup_remote_numa(i_reg); + if (ret) + goto out_teardown_pa; + } else { + i_reg->numa_id = NUMA_NO_NODE; + } + + return 0; + +out_teardown_pa: + rollback_ret = teardown_pa(i_reg); + if (rollback_ret) { + pr_err("failed to teardown PA level mapping on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } +out_free_addr_range: + rollback_ret = free_addr_range(i_reg); + if (rollback_ret) { + pr_err("failed to free address range on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } + return ret; +} + +static int release_import_memory(struct obmm_import_region *i_reg) +{ + int ret, rollback_ret, old_numa_id; + + if (region_numa_remote(&i_reg->region)) { + old_numa_id = i_reg->numa_id; + ret = teardown_remote_numa(i_reg, false); + if (ret) + goto err_teardown_numa; + } + + ret = flush_import_region(i_reg, 0, i_reg->region.mem_size, OBMM_SHM_CACHE_INVAL); + if (ret) { + pr_err("failed to flush import region, ret=%pe.\n", ERR_PTR(ret)); + goto err_flush; + } + + /* unplug memory */ + ret = teardown_pa(i_reg); + if (ret) { + pr_err("failed to release PA level mapping of region %d, ret=%pe.\n", + i_reg->region.regionid, ERR_PTR(ret)); + goto err_flush; + } + + ret = free_addr_range(i_reg); + if (ret) + goto err_free_addr_range; + + return 0; + +err_free_addr_range: + rollback_ret = setup_pa(i_reg); + if (rollback_ret) { + pr_err("failed to restore PA level mapping, ret=%pe.\n", ERR_PTR(rollback_ret)); + return -ENOTRECOVERABLE; /* rollback cannot proceed */ + } +err_flush: + if (region_numa_remote(&i_reg->region)) { + i_reg->numa_id = old_numa_id; + + rollback_ret = setup_remote_numa(i_reg); + if (rollback_ret) { + pr_err("failed to restore remote NUMA, ret=%pe.\n", ERR_PTR(rollback_ret)); + return -ENOTRECOVERABLE; /* rollback cannot proceed */ + } + } +err_teardown_numa: + return ret; +} + +static bool validate_pa_range(phys_addr_t pa, size_t size) +{ + /* the PA alignment of OBMM_BASIC_GRANU might be an overkill if PAGE_SIZE is not 4K. But + * this is not be a common use case for now. + */ + if (!IS_ALIGNED(pa, OBMM_BASIC_GRANU) || !IS_ALIGNED(size, OBMM_BASIC_GRANU)) { + pr_err("PA segments not aligned to OBMM basic granu: base=%#llx, size=%#zx, granularity=%#lx.\n", + pa, size, OBMM_BASIC_GRANU); + return false; + } + + if (pa == 0) { + pr_err("PA=0 unexpected.\n"); + return false; + } + if (pa + size < pa) { + pr_err("PA range overflow: base=%#llx, size=%#zx.\n", pa, size); + return false; + } + + return true; +} + +static bool validate_import_region(const struct obmm_import_region *i_reg) +{ + /* size and alignment check */ + if (i_reg->region.mem_size == 0) { + pr_err("Zero memory segment size is invalid\n"); + return false; + } + + /* PA as parameter */ + if (!validate_pa_range(i_reg->pa, i_reg->region.mem_size)) + return false; + return true; +} + +static int import_to_region_flags(unsigned long *region_flags, unsigned long import_flags) +{ + *region_flags = 0; + + if (import_flags & (~OBMM_IMPORT_FLAG_MASK)) { + pr_err("Invalid import flags %#lx (unknown flags: %#lx).\n", import_flags, + import_flags & (~OBMM_IMPORT_FLAG_MASK)); + return -EINVAL; + } + if (!!(import_flags & OBMM_IMPORT_FLAG_ALLOW_MMAP) + + !!(import_flags & OBMM_IMPORT_FLAG_NUMA_REMOTE) != 1) { + pr_err("Exactly one of {ALLOW_MMAP, NUMA_REMOTE} must be specified as import flag.\n"); + return -EINVAL; + } + + if (import_flags & OBMM_IMPORT_FLAG_ALLOW_MMAP) + *region_flags |= OBMM_REGION_FLAG_ALLOW_MMAP; + if (import_flags & OBMM_IMPORT_FLAG_NUMA_REMOTE) + *region_flags |= OBMM_REGION_FLAG_NUMA_REMOTE; + + return 0; +} + +static int init_import_region_from_cmd(const struct obmm_cmd_import *param, + struct obmm_import_region *i_reg) +{ + int ret; + bool config_numa_dist; + struct obmm_region *region = &i_reg->region; + + i_reg->region.type = OBMM_IMPORT_REGION; + i_reg->region.mem_size = param->length; + /* set flags */ + ret = import_to_region_flags(®ion->flags, param->flags); + if (ret) + return ret; + + i_reg->pa = param->addr; + + i_reg->dcna = param->dcna; + i_reg->scna = param->scna; + memcpy(i_reg->deid, param->deid, sizeof(i_reg->deid)); + memcpy(i_reg->seid, param->seid, sizeof(i_reg->seid)); + i_reg->numa_id = region_numa_remote(&i_reg->region) ? param->numa_id : NUMA_NO_NODE; + + ret = set_obmm_region_priv(region, param->priv_len, param->priv); + if (ret) + return ret; + + if (!validate_import_region(i_reg)) + return -EINVAL; + + config_numa_dist = region_numa_remote(&i_reg->region); + if (config_numa_dist && !is_numa_base_dist_valid(param->base_dist)) + return -EINVAL; + i_reg->base_dist = param->base_dist; + + /* NOTE: this function initializes the data structure but not the device */ + return 0; +} + +static void print_import_param(const struct obmm_cmd_import *cmd_import) +{ + pr_info("obmm_import: scna=%#x {pa=%#llx length=%#llx} flags=%#llx nid=%d base_dist=%u seid=" + EID_FMT64 " priv_len=%u\n", + cmd_import->scna, cmd_import->addr, cmd_import->length, cmd_import->flags, + cmd_import->numa_id, cmd_import->base_dist, EID_ARGS64_H(cmd_import->seid), + EID_ARGS64_L(cmd_import->seid), cmd_import->priv_len); +} + +int obmm_import(struct obmm_cmd_import *cmd_import) +{ + int retval, rollback_ret, numa_id; + struct obmm_import_region *i_reg; + uint64_t mem_id; + + print_import_param(cmd_import); + /* create obmm region */ + i_reg = kzalloc(sizeof(struct obmm_import_region), GFP_KERNEL); + if (i_reg == NULL) + return -ENOMEM; + + /* arguments to region (logs produced by callee) */ + retval = init_import_region_from_cmd(cmd_import, i_reg); + if (retval) + goto out_free_ireg; + + retval = init_obmm_region(&i_reg->region); + if (retval) + goto out_free_ireg; + + retval = prepare_import_memory(i_reg); + if (retval) { + pr_err("Failed to prepare import memory: ret=%pe\n", ERR_PTR(retval)); + goto out_region_uninit; + } + + numa_id = i_reg->numa_id; + mem_id = (uint64_t)i_reg->region.regionid; + + retval = register_obmm_region(&i_reg->region); + if (retval) { + pr_err("Failed to create import device. ret=%pe\n", ERR_PTR(retval)); + goto out_release_memory; + } + activate_obmm_region(&i_reg->region); + + /* pass back output value */ + cmd_import->numa_id = numa_id; + cmd_import->mem_id = mem_id; + + pr_info("%s: mem_id=%llu online\n", __func__, cmd_import->mem_id); + return 0; + +out_release_memory: + rollback_ret = release_import_memory(i_reg); + if (rollback_ret) + pr_warn("Failed to release import memory on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); +out_region_uninit: + uninit_obmm_region(&i_reg->region); +out_free_ireg: + kfree(i_reg); + return retval; +} + +/* NOTE: the operation order is not precisely the reverse order of initialization for the ease of + * error rollback. Please make careful evaluation on modifications. + */ +int obmm_unimport(const struct obmm_cmd_unimport *cmd_unimport) +{ + int ret; + struct obmm_region *reg; + struct obmm_import_region *i_reg; + + pr_info("%s: mem_id=%llu, flags=%#llx.\n", __func__, cmd_unimport->mem_id, + cmd_unimport->flags); + if (!validate_obmm_mem_id(cmd_unimport->mem_id)) + return -ENOENT; + if (cmd_unimport->flags & (~OBMM_UNIMPORT_FLAG_MASK)) { + pr_err("%s: invalid flags %#llx.\n", __func__, cmd_unimport->flags); + return -EINVAL; + } + + reg = search_deactivate_obmm_region(cmd_unimport->mem_id); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + if (reg->type != OBMM_IMPORT_REGION) { + pr_err("%s: mem_id=%llu region type mismatched.\n", __func__, cmd_unimport->mem_id); + ret = -EINVAL; + goto err_unimport; + } + i_reg = container_of(reg, struct obmm_import_region, region); + ret = release_import_memory(i_reg); + if (ret) + goto err_unimport; + + deregister_obmm_region(reg); + uninit_obmm_region(reg); + kfree(i_reg); + + pr_info("%s: mem_id=%llu completed.\n", __func__, cmd_unimport->mem_id); + return 0; + +err_unimport: + activate_obmm_region(reg); + pr_err("%s: mem_id=%llu failed, %pe.\n", __func__, cmd_unimport->mem_id, ERR_PTR(ret)); + return ret; +} + +int flush_import_region(struct obmm_import_region *i_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops) +{ + int ret; + + ret = flush_cache_by_pa(i_reg->pa + offset, length, cache_ops); + if (ret) + return ret; + + if (cache_ops == OBMM_SHM_CACHE_WB_INVAL || cache_ops == OBMM_SHM_CACHE_WB_ONLY) + return ub_write_queue_flush(i_reg->scna); + return 0; +} + +int map_import_region(struct vm_area_struct *vma, struct obmm_import_region *i_reg, + enum obmm_mmap_granu mmap_granu) +{ + unsigned long pfn, size; + + size = vma->vm_end - vma->vm_start; + pfn = __phys_to_pfn(i_reg->pa) + vma->vm_pgoff; + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) + return remap_pfn_range(vma, vma->vm_start, pfn, size, vma->vm_page_prot); + else if (mmap_granu == OBMM_MMAP_GRANU_PMD) + return remap_pfn_range_try_pmd(vma, vma->vm_start, pfn, size, vma->vm_page_prot); + pr_err("invalid mmap granu %d\n", mmap_granu); + + return -EINVAL; +} + +int get_pa_detail_import(const struct obmm_import_region *i_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr) +{ + if (pa < i_reg->pa || pa >= i_reg->pa + i_reg->region.mem_size) + return -EFAULT; + + ext_addr->region_type = OBMM_IMPORT_REGION; + ext_addr->regionid = i_reg->region.regionid; + ext_addr->offset = pa - i_reg->pa; + ext_addr->tid = 0; + ext_addr->uba = 0; + ext_addr->numa_id = i_reg->numa_id; + ext_addr->pa = pa; + + return 0; +} + +int get_offset_detail_import(const struct obmm_import_region *i_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr) +{ + if (offset >= i_reg->region.mem_size) { + pr_err("%s: invalid offset 0x%lx\n", __func__, offset); + return -EINVAL; + } + + ext_addr->region_type = i_reg->region.type; + ext_addr->regionid = i_reg->region.regionid; + ext_addr->offset = offset; + ext_addr->tid = 0; + ext_addr->uba = 0; + ext_addr->pa = i_reg->pa + offset; + ext_addr->numa_id = i_reg->numa_id; + + return 0; +} diff --git a/drivers/ub/obmm/obmm_import.h b/drivers/ub/obmm/obmm_import.h new file mode 100644 index 000000000000..e24cb23fce33 --- /dev/null +++ b/drivers/ub/obmm/obmm_import.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_IMPORT_H +#define OBMM_IMPORT_H + +#include "obmm_core.h" + +int obmm_import(struct obmm_cmd_import *cmd_import); +int obmm_unimport(const struct obmm_cmd_unimport *cmd_unimport); + +int flush_import_region(struct obmm_import_region *i_reg, unsigned long offset, + unsigned long length, unsigned long cache_ops); +int map_import_region(struct vm_area_struct *vma, struct obmm_import_region *i_reg, + enum obmm_mmap_granu mmap_granu); + +int get_pa_detail_import(const struct obmm_import_region *i_reg, unsigned long pa, + struct obmm_ext_addr *ext_addr); + +int get_offset_detail_import(const struct obmm_import_region *i_reg, unsigned long offset, + struct obmm_ext_addr *ext_addr); + +#endif diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index 72c7dd5d6795..b369e92a860a 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -81,9 +81,40 @@ struct obmm_cmd_addr_query { __u64 pa; } __attribute__((aligned(8))); +#define OBMM_IMPORT_FLAG_ALLOW_MMAP 0x1UL +#define OBMM_IMPORT_FLAG_NUMA_REMOTE 0x4UL +#define OBMM_IMPORT_FLAG_MASK (OBMM_IMPORT_FLAG_ALLOW_MMAP | \ + OBMM_IMPORT_FLAG_NUMA_REMOTE) + + +struct obmm_cmd_import { + __u64 flags; + __u64 mem_id; + __u64 addr; + __u64 length; + __u32 tokenid; + __u32 scna; + __u32 dcna; + __s32 numa_id; + __u16 priv_len; + __u8 base_dist; + __u8 deid[16]; + __u8 seid[16]; + const void *priv; +} __attribute__((aligned(8))); + +#define OBMM_UNIMPORT_FLAG_MASK (0UL) + +struct obmm_cmd_unimport { + __u64 mem_id; + __u64 flags; +} __attribute__((aligned(8))); + #define OBMM_CMD_EXPORT _IOWR('x', 0, struct obmm_cmd_export) +#define OBMM_CMD_IMPORT _IOWR('x', 1, struct obmm_cmd_import) #define OBMM_CMD_UNEXPORT _IOW('x', 2, struct obmm_cmd_unexport) +#define OBMM_CMD_UNIMPORT _IOW('x', 3, struct obmm_cmd_unimport) #define OBMM_CMD_ADDR_QUERY _IOWR('x', 4, struct obmm_cmd_addr_query) #define OBMM_CMD_EXPORT_PID _IOWR('x', 5, struct obmm_cmd_export_pid) -- Gitee From 28cfdbc41a8be38514ad187df0399c36160abe16 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:41 +0800 Subject: [PATCH 17/48] obmm: Add pre-import support for performance optimization commit 0f73521b11e7af9630ffbe53178282f6f81ce7e6 openEuler When introducing remote memory using NUMA, creating NUMA nodes results in significant time consumption. Pre-introduction can complete the NUMA node creation work in advance. It does not depend on the complete information generated during memory export, so it can be completed before memory export. When the memory is actually brought online, this section of memory can be "injected" into NUMA, which can accelerate the software process on the critical path. The physical address segment serves as the matching index. When actually introducing memory, obmm_import needs to be configured with the *OBMM_IMPORT_FLAG_PREIMPORT* flag, and its parameters must hit the index to achieve introduction acceleration: The physical address segment of obmm_import must be a subset of the pre-introduced address segment. Also, /proc/obmm/preimport_info interface is provided for diagnostics. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 4 +- drivers/ub/obmm/obmm_addr_check.c | 10 +- drivers/ub/obmm/obmm_addr_check.h | 1 + drivers/ub/obmm/obmm_core.c | 41 +++ drivers/ub/obmm/obmm_core.h | 8 + drivers/ub/obmm/obmm_import.c | 141 +++++---- drivers/ub/obmm/obmm_preimport.c | 333 ++++++++++++++++++++ drivers/ub/obmm/obmm_preimport.h | 56 ++++ drivers/ub/obmm/obmm_preimport_prefilled.c | 342 +++++++++++++++++++++ include/uapi/ub/obmm.h | 21 ++ 10 files changed, 892 insertions(+), 65 deletions(-) create mode 100644 drivers/ub/obmm/obmm_preimport.c create mode 100644 drivers/ub/obmm/obmm_preimport.h create mode 100644 drivers/ub/obmm/obmm_preimport_prefilled.c diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 472d99ec31b4..0b7410251bf5 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -4,8 +4,8 @@ obmm-y := obmm_core.o \ obmm_cache.o obmm_import.o \ obmm_export_from_user.o conti_mem_allocator.o \ - obmm_export.o obmm_export_from_pool.o \ + obmm_export.o obmm_export_from_pool.o obmm_preimport.o \ ubmempool_allocator.o obmm_export_region_ops.o \ - obmm_addr_check.o + obmm_addr_check.o obmm_preimport_prefilled.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/obmm_addr_check.c b/drivers/ub/obmm/obmm_addr_check.c index d4c263e2f933..09085d008ca8 100644 --- a/drivers/ub/obmm/obmm_addr_check.c +++ b/drivers/ub/obmm/obmm_addr_check.c @@ -51,7 +51,9 @@ int occupy_pa_range(const struct obmm_pa_range *pa_range) return ret; } pr_debug("pa_check: add [%pa,%pa]->{user=%s,data=%p}\n", &pa_range->start, &pa_range->end, - "direct_import", pa_range->info.data); + pa_range->info.user == OBMM_ADDR_USER_DIRECT_IMPORT ? + "direct_import" : "preimport", + pa_range->info.data); return 0; } @@ -75,7 +77,8 @@ int free_pa_range(const struct obmm_pa_range *pa_range) pr_err("BUG: PA range does not fully match.\n"); ret = -ENOTRECOVERABLE; } - user = "import"; + user = ((struct obmm_pa_range *)entry)->info.user == OBMM_ADDR_USER_DIRECT_IMPORT ? + "import" : "preimport"; pr_debug("pa_check: del [%pa,?]->{user=%s,data=%p}\n", &pa_range->start, user, ((struct obmm_pa_range *)entry)->info.data); kfree(entry); @@ -123,7 +126,8 @@ int update_pa_range(phys_addr_t addr, const struct obmm_addr_info *info) if (!retrieved) return -EFAULT; - pr_debug("pa_check: update [%pa,?]->{user=%s,data=%p}\n", &addr, "direct_import", + pr_debug("pa_check: update [%pa,?]->{user=%s,data=%p}\n", &addr, + info->user == OBMM_ADDR_USER_DIRECT_IMPORT ? "direct_import" : "preimport", info->data); return 0; } diff --git a/drivers/ub/obmm/obmm_addr_check.h b/drivers/ub/obmm/obmm_addr_check.h index 9acb4db2773b..d68586cad455 100644 --- a/drivers/ub/obmm/obmm_addr_check.h +++ b/drivers/ub/obmm/obmm_addr_check.h @@ -9,6 +9,7 @@ enum obmm_addr_user { OBMM_ADDR_USER_DIRECT_IMPORT, + OBMM_ADDR_USER_PREIMPORT, }; struct obmm_addr_info { enum obmm_addr_user user; diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index 2ebed5b7277b..8812a62b2a89 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -24,6 +24,7 @@ #include "obmm_export_region_ops.h" #include "ubmempool_allocator.h" #include "obmm_import.h" +#include "obmm_preimport.h" #include "obmm_addr_check.h" #include "obmm_export.h" #include "obmm_core.h" @@ -406,6 +407,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, struct obmm_cmd_unimport unimport; struct obmm_cmd_addr_query query; struct obmm_cmd_export_pid export_pid; + struct obmm_cmd_preimport preimport; } cmd_param; switch (cmd) { @@ -505,6 +507,35 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, return -EFAULT; } } break; + case OBMM_CMD_DECLARE_PREIMPORT: { + ret = (int)copy_from_user(&cmd_param.preimport, (void __user *)arg, + sizeof(struct obmm_cmd_preimport)); + if (ret) { + pr_err("failed to load preimport argument"); + return -EFAULT; + } + + ret = obmm_preimport(&cmd_param.preimport); + if (ret) + return ret; + + ret = (int)copy_to_user((void __user *)arg, &cmd_param.preimport, + sizeof(struct obmm_cmd_preimport)); + if (ret) { + pr_err("failed to write preimport result"); + return -EFAULT; + } + } break; + case OBMM_CMD_UNDECLARE_PREIMPORT: { + ret = (int)copy_from_user(&cmd_param.preimport, (void __user *)arg, + sizeof(struct obmm_cmd_preimport)); + if (ret) { + pr_err("failed to load preimport argument"); + return -EFAULT; + } + + ret = obmm_unpreimport(&cmd_param.preimport); + } break; default: ret = -ENOTTY; } @@ -544,9 +575,18 @@ static int __init obmm_init(void) module_addr_check_init(); + ret = module_preimport_init(); + if (ret) { + pr_err("failed to initialize preimport range manager. ret=%pe.\n", ERR_PTR(ret)); + goto out_addr_check_exit; + } + pr_info("obmm_module: init completed\n"); return ret; +out_addr_check_exit: + module_addr_check_exit(); + misc_deregister(&obmm_dev_handle); out_allocator_exit: ubmempool_allocator_exit(); return ret; @@ -556,6 +596,7 @@ static void __exit obmm_exit(void) { pr_info("obmm_module: exit started\n"); + module_preimport_exit(); module_addr_check_exit(); misc_deregister(&obmm_dev_handle); ubmempool_allocator_exit(); diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index 18d3406123bc..5420bebba615 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -25,6 +25,7 @@ #endif #define pr_fmt(fmt) "OBMM: " fmt +#define EID_BYTES 16 #define EID_FMT64 "%#llx:%#llx" #define EID_ALIGNED_FMT64 "%#0*llx:%#0*llx" @@ -55,6 +56,7 @@ enum obmm_mmap_granu { #define OBMM_REGION_FLAG_ALLOW_MMAP 0x2 #define OBMM_REGION_FLAG_MEMORY_FROM_USER 0x4 #define OBMM_REGION_FLAG_FAST_ALLOC 0x8 +#define OBMM_REGION_FLAG_PREIMPORT 0x10 #define OBMM_INVALID_REGIONID 0 #define OBMM_MIN_VALID_REGIONID 1 @@ -110,6 +112,10 @@ static inline bool region_memory_from_user(const struct obmm_region *reg) { return reg->flags & OBMM_REGION_FLAG_MEMORY_FROM_USER; } +static inline bool region_preimport(const struct obmm_region *reg) +{ + return reg->flags & OBMM_REGION_FLAG_PREIMPORT; +} static inline bool region_fast_alloc(const struct obmm_region *reg) { return reg->flags & OBMM_REGION_FLAG_FAST_ALLOC; @@ -130,6 +136,8 @@ struct obmm_import_region { */ u8 base_dist; + /* handle to manage associated preimport range */ + void *preimport_handle; u8 deid[16]; u8 seid[16]; }; diff --git a/drivers/ub/obmm/obmm_import.c b/drivers/ub/obmm/obmm_import.c index 0fd8a969c262..55ef257a3c01 100644 --- a/drivers/ub/obmm/obmm_import.c +++ b/drivers/ub/obmm/obmm_import.c @@ -13,8 +13,19 @@ #include "obmm_core.h" #include "obmm_cache.h" #include "obmm_import.h" +#include "obmm_preimport.h" #include "obmm_addr_check.h" +static void set_import_region_datapath(const struct obmm_import_region *i_reg, + struct obmm_datapath *datapath) +{ + datapath->scna = i_reg->scna; + datapath->dcna = i_reg->dcna; + /* shallow copy */ + datapath->seid = i_reg->seid; + datapath->deid = i_reg->deid; +} + static unsigned long get_pa_range_mem_cap(u32 scna, phys_addr_t pa, size_t size) { phys_addr_t pa_start = pa; @@ -34,23 +45,40 @@ static unsigned long get_pa_range_mem_cap(u32 scna, phys_addr_t pa, size_t size) static int setup_pa(struct obmm_import_region *i_reg) { + phys_addr_t start, end; + struct obmm_datapath datapath; + i_reg->region.mem_cap = get_pa_range_mem_cap(i_reg->scna, i_reg->pa, i_reg->region.mem_size); if (i_reg->region.mem_cap == 0) return -EINVAL; - return 0; + if (!region_preimport(&i_reg->region)) + return 0; + + start = i_reg->pa; + end = i_reg->pa + i_reg->region.mem_size - 1; + set_import_region_datapath(i_reg, &datapath); + + return preimport_commit_prefilled(start, end, &datapath, &i_reg->numa_id, + &i_reg->preimport_handle); } /* NOTE: do not clear PA in the teardown process. Error rollback procedure may rely on it. */ static int teardown_pa(struct obmm_import_region *i_reg) { - return 0; + bool preimport = region_preimport(&i_reg->region); + + if (!preimport) + return 0; + /* prefilled and preimport */ + return preimport_uncommit_prefilled(i_reg->preimport_handle, i_reg->pa, + i_reg->pa + i_reg->region.mem_size - 1); } static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) { - int ret; + int ret, this_ret; pr_info("call external: remove_memory_remote(nid=%d, pa=%#llx, size=%#llx)\n", i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); @@ -60,63 +88,36 @@ static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) if (ret != 0 && !force) return ret; - return ret; -} - -static bool is_numa_base_dist_valid(uint8_t base_dist) -{ - if (base_dist > MAX_NUMA_DIST) { - pr_err("invalid numa base distance %d: out of valid range.\n", base_dist); - return false; - } - if (base_dist != 0 && base_dist <= LOCAL_DISTANCE) { - pr_err("invalid numa base distance %d: reserved values used.\n", base_dist); - return false; - } - return true; -} - -static int obmm_set_numa_distance(unsigned int cna, int nid_remote, uint8_t base_dist) -{ - int nid_local, nid, min_dist, i = 0; - int node_distances[OBMM_MAX_LOCAL_NUMA_NODES]; - int nids[OBMM_MAX_LOCAL_NUMA_NODES]; - - if (!is_numa_base_dist_valid(base_dist)) - return -EINVAL; - - nid_local = ub_mem_get_numa_id(cna); - pr_debug("for cna = %#x, get local node = %d\n", cna, nid_local); - if (nid_local < 0) { - pr_err("failed to set numa distance: bus controller with CNA=%u has nid=%d.", cna, - nid_local); - return -ENODEV; - } - - if (base_dist == 0) - return 0; - - min_dist = __node_distance(nid_local, nid_local); - - for_each_online_local_node(nid) { - nids[i] = nid; - node_distances[i++] = - min(MAX_NUMA_DIST, base_dist + __node_distance(nid_local, nid) - min_dist); + if (region_preimport(&i_reg->region)) { + pr_info("call external: add_memory_remote(nid=%d, start=0x%llx, size=0x%llx, flags=MEMORY_KEEP_ISOLATED)\n", + i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); + this_ret = add_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, + MEMORY_KEEP_ISOLATED); + pr_debug("external called: add_memory_remote() returned %d\n", this_ret); + if (this_ret == NUMA_NO_NODE) { + pr_err("failed to reset preimport memory.\n"); + ret = -ENOTRECOVERABLE; + } } - return numa_remote_set_distance(nid_remote, nids, node_distances, i); + return ret; } static int setup_remote_numa(struct obmm_import_region *i_reg) { int ret, flags; - flags = MEMORY_DIRECT_ONLINE; + if (region_preimport(&i_reg->region)) + flags = 0; + else + flags = MEMORY_DIRECT_ONLINE; + if (!(i_reg->region.mem_cap & OBMM_MEM_ALLOW_CACHEABLE_MMAP)) { pr_err("PA range invalid. Cacheable memory cannot be managed with numa.remote: pa=%pa, size=%#llx\n", &i_reg->pa, i_reg->region.mem_size); return -EINVAL; } + pr_info("call external: add_memory_remote(nid=%d, start=0x%llx, size=0x%llx, flags=%d)\n", i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, flags); ret = add_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, flags); @@ -128,10 +129,12 @@ static int setup_remote_numa(struct obmm_import_region *i_reg) WARN_ON(i_reg->numa_id != NUMA_NO_NODE && i_reg->numa_id != ret); i_reg->numa_id = ret; - ret = obmm_set_numa_distance(i_reg->scna, i_reg->numa_id, i_reg->base_dist); - if (ret < 0) { - pr_err("Failed to set remote numa distance: %pe\n", ERR_PTR(ret)); - goto out_teardown_remote_numa; + if (!region_preimport(&i_reg->region)) { + ret = obmm_set_numa_distance(i_reg->scna, i_reg->numa_id, i_reg->base_dist); + if (ret < 0) { + pr_err("Failed to set remote numa distance: %pe\n", ERR_PTR(ret)); + goto out_teardown_remote_numa; + } } return 0; @@ -144,22 +147,30 @@ static inline int occupy_addr_range(const struct obmm_import_region *i_reg) { struct obmm_pa_range pa; - pa.start = i_reg->pa; - pa.end = i_reg->pa + i_reg->region.mem_size - 1; - pa.info.user = OBMM_ADDR_USER_DIRECT_IMPORT; - pa.info.data = (void *)i_reg; + if (!region_preimport(&i_reg->region)) { + pa.start = i_reg->pa; + pa.end = i_reg->pa + i_reg->region.mem_size - 1; + pa.info.user = OBMM_ADDR_USER_DIRECT_IMPORT; + pa.info.data = (void *)i_reg; + return occupy_pa_range(&pa); + } - return occupy_pa_range(&pa); + /* preimport + decoder_prefilled: address conflicts managed by its perimport range */ + return 0; } static int free_addr_range(const struct obmm_import_region *i_reg) { struct obmm_pa_range pa; - pa.start = i_reg->pa; - pa.end = i_reg->pa + i_reg->region.mem_size - 1; + if (!region_preimport(&i_reg->region)) { + pa.start = i_reg->pa; + pa.end = i_reg->pa + i_reg->region.mem_size - 1; + return free_pa_range(&pa); + } - return free_pa_range(&pa); + /* preimport + decoder_prefilled: address conflicts managed by its perimport range */ + return 0; } static int prepare_import_memory(struct obmm_import_region *i_reg) @@ -281,12 +292,15 @@ static bool validate_pa_range(phys_addr_t pa, size_t size) static bool validate_import_region(const struct obmm_import_region *i_reg) { + bool preimport; + /* size and alignment check */ if (i_reg->region.mem_size == 0) { pr_err("Zero memory segment size is invalid\n"); return false; } + preimport = region_preimport(&i_reg->region); /* PA as parameter */ if (!validate_pa_range(i_reg->pa, i_reg->region.mem_size)) return false; @@ -307,9 +321,16 @@ static int import_to_region_flags(unsigned long *region_flags, unsigned long imp pr_err("Exactly one of {ALLOW_MMAP, NUMA_REMOTE} must be specified as import flag.\n"); return -EINVAL; } + if ((import_flags & OBMM_IMPORT_FLAG_PREIMPORT) && + !(import_flags & OBMM_IMPORT_FLAG_NUMA_REMOTE)) { + pr_err("Preimport must be used with NUMA_REMOTE.\n"); + return -EINVAL; + } if (import_flags & OBMM_IMPORT_FLAG_ALLOW_MMAP) *region_flags |= OBMM_REGION_FLAG_ALLOW_MMAP; + if (import_flags & OBMM_IMPORT_FLAG_PREIMPORT) + *region_flags |= OBMM_REGION_FLAG_PREIMPORT; if (import_flags & OBMM_IMPORT_FLAG_NUMA_REMOTE) *region_flags |= OBMM_REGION_FLAG_NUMA_REMOTE; @@ -345,7 +366,7 @@ static int init_import_region_from_cmd(const struct obmm_cmd_import *param, if (!validate_import_region(i_reg)) return -EINVAL; - config_numa_dist = region_numa_remote(&i_reg->region); + config_numa_dist = region_numa_remote(&i_reg->region) && !region_preimport(&i_reg->region); if (config_numa_dist && !is_numa_base_dist_valid(param->base_dist)) return -EINVAL; i_reg->base_dist = param->base_dist; diff --git a/drivers/ub/obmm/obmm_preimport.c b/drivers/ub/obmm/obmm_preimport.c new file mode 100644 index 000000000000..76d63159af19 --- /dev/null +++ b/drivers/ub/obmm/obmm_preimport.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include "obmm_preimport.h" +#include "obmm_import.h" + +static char not_ready_dummy; +void *not_ready_ptr = ¬_ready_dummy; + +static DEFINE_MUTEX(list_mutex); +static LIST_HEAD(pr_list); + +bool is_numa_base_dist_valid(uint8_t base_dist) +{ + if (base_dist > MAX_NUMA_DIST) { + pr_err("invalid numa base distance %d: out of valid range.\n", base_dist); + return false; + } + if (base_dist != 0 && base_dist <= LOCAL_DISTANCE) { + pr_err("invalid numa base distance %d: reserved values used.\n", base_dist); + return false; + } + return true; +} + +int obmm_set_numa_distance(unsigned int cna, int nid_remote, uint8_t base_dist) +{ + int nid_local, nid, min_dist, i = 0; + int node_distances[OBMM_MAX_LOCAL_NUMA_NODES]; + int nids[OBMM_MAX_LOCAL_NUMA_NODES]; + + if (!is_numa_base_dist_valid(base_dist)) + return -EINVAL; + + nid_local = ub_mem_get_numa_id(cna); + pr_debug("for cna = %#x, get local node = %d\n", cna, nid_local); + if (nid_local < 0) { + pr_err("failed to set numa distance: bus controller with CNA=%u has nid=%d.", cna, + nid_local); + return -ENODEV; + } + + if (base_dist == 0) + return 0; + + min_dist = __node_distance(nid_local, nid_local); + + for_each_online_local_node(nid) { + nids[i] = nid; + node_distances[i++] = + min(MAX_NUMA_DIST, base_dist + __node_distance(nid_local, nid) - min_dist); + } + + return numa_remote_set_distance(nid_remote, nids, node_distances, i); +} + +int check_preimport_cmd_common(const struct obmm_cmd_preimport *cmd) +{ + /* OBMM_BASIC_GRANU is always smaller than or equal to memory_block_size_bytes(). No need + * to check for OBMM_BASIC_GRANU here. + */ + if (cmd->length % memory_block_size_bytes() != 0) { + pr_err("preimport length not aligned to %#lx: %#llx + %#llx.\n", + memory_block_size_bytes(), cmd->pa, cmd->length); + return -EINVAL; + } + if (cmd->pa % memory_block_size_bytes()) { + pr_err("preimport base PA not aligned to %#lx: %#llx + %#llx.\n", + memory_block_size_bytes(), cmd->pa, cmd->length); + return -EINVAL; + } + if (cmd->length > ULLONG_MAX - cmd->pa) { + pr_err("preimport PA range overflowed: %#llx + %#llx.\n", cmd->pa, cmd->length); + return -EINVAL; + } + if (cmd->length == 0) { + pr_err("invalid preimport length 0.\n"); + return -EINVAL; + } + if (cmd->flags & ~OBMM_PREIMPORT_FLAG_MASK) { + pr_err("undefined preimport flags specified in %#llx.\n", cmd->flags); + return -EINVAL; + } + /* scna is mandatory parameter, always required to initialize NUMA distance */ + if (!validate_scna(cmd->scna)) + return -ENODEV; + if (!is_numa_base_dist_valid(cmd->base_dist)) + return -EINVAL; + return 0; +} + +int preimport_prepare_common(struct preimport_range *pr, uint8_t base_dist) +{ + int ret, ret_err; + + if (!ub_memory_validate_pa(pr->scna, pr->start, pr->end, true)) { + pr_err("PA range invalid. Cacheable memory cannot be managed with preimport: pa=%pa, size=%#llx\n", + &pr->start, pr->end - pr->start + 1); + return -EINVAL; + } + + pr_info("call external: add_memory_remote(nid=%d, start=%pa, size=%#llx, flags=MEMORY_KEEP_ISOLATED)\n", + pr->numa_id, &pr->start, pr->end - pr->start + 1); + ret = add_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1, + MEMORY_KEEP_ISOLATED); + pr_debug("external called: add_memory_remote() returned %d\n", ret); + if (ret < 0) + return -EPERM; + WARN_ON(pr->numa_id != NUMA_NO_NODE && pr->numa_id != ret); + pr->numa_id = ret; + + ret = obmm_set_numa_distance(pr->scna, pr->numa_id, base_dist); + if (ret < 0) { + pr_err("Failed to set numa distance for remote numa: %pe\n", ERR_PTR(ret)); + goto err_remove_memory_remote; + } + + mutex_lock(&list_mutex); + list_add(&pr->node, &pr_list); + mutex_unlock(&list_mutex); + + return 0; + +err_remove_memory_remote: + pr_info("call external: remove_memory_remote(nid=%d, start=%pa, size=%#llx)\n", pr->numa_id, + &pr->start, pr->end - pr->start + 1); + ret_err = remove_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1); + pr_debug("external called: remove_memory_remote() returned %d\n", ret_err); + return ret; +} + +int preimport_release_common(struct preimport_range *pr, bool force) +{ + int ret; + + pr_info("call external: remove_memory_remote(nid=%d, start=%pa, size=%#llx)\n", pr->numa_id, + &pr->start, pr->end - pr->start + 1); + ret = remove_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1); + pr_debug("external called: remove_memory_remote() returned %pe\n", ERR_PTR(ret)); + if (ret && !force) + return ret; + + mutex_lock(&list_mutex); + list_del(&pr->node); + mutex_unlock(&list_mutex); + return ret; +} + +int check_preimport_datapath_common(const struct preimport_range *pr, + const struct obmm_datapath *datapath) +{ + if (pr->scna != datapath->scna || pr->dcna != datapath->dcna) { + pr_err("scna-dcna pair mismatch: <%#x, %#x> used in import; <%#x, %#x> in preimport.\n", + datapath->scna, datapath->dcna, pr->scna, pr->dcna); + return -EINVAL; + } + if (memcmp(pr->seid, datapath->seid, EID_BYTES)) { + pr_err("seid mismatch: " EID_FMT64 " used in import; " EID_FMT64 " in preimport.\n", + EID_ARGS64_H(datapath->seid), EID_ARGS64_L(datapath->seid), + EID_ARGS64_H(pr->seid), EID_ARGS64_L(pr->seid)); + return -EINVAL; + } + if (memcmp(pr->deid, datapath->deid, EID_BYTES)) { + pr_err("deid mismatch: " EID_FMT64 " used in import; " EID_FMT64 " in preimport.\n", + EID_ARGS64_H(datapath->deid), EID_ARGS64_L(datapath->deid), + EID_ARGS64_H(pr->deid), EID_ARGS64_L(pr->deid)); + return -EINVAL; + } + + return 0; +} + +static void print_preimport_param(const struct obmm_cmd_preimport *cmd) +{ + pr_info("obmm_preimport: pa=%#llx length=%#llx scna=%#x dcna=%#x flags=%#llx nid=%d base_dist=%u deid=" + EID_FMT64 " seid=" EID_FMT64 " priv_len=%u\n", + cmd->pa, cmd->length, cmd->scna, cmd->dcna, cmd->flags, cmd->numa_id, + cmd->base_dist, EID_ARGS64_H(cmd->deid), EID_ARGS64_L(cmd->deid), + EID_ARGS64_H(cmd->seid), EID_ARGS64_L(cmd->seid), cmd->priv_len); +} + +int obmm_preimport(struct obmm_cmd_preimport *cmd) +{ + int ret; + + print_preimport_param(cmd); + if (!try_module_get(THIS_MODULE)) { + pr_err("Module is dying. Reject all preimport requests\n"); + return -EPERM; + } + + ret = preimport_prepare_prefilled(cmd); + + if (ret) + module_put(THIS_MODULE); + else + pr_info("%s: preimport on nid=%d finished.\n", __func__, cmd->numa_id); + return ret; +} + +static int check_unpreimport_cmd_common(const struct obmm_cmd_preimport *cmd) +{ + if (cmd->flags & ~OBMM_UNPREIMPORT_FLAG_MASK) { + pr_err("undefined unpreimport flags specified in %#llx.\n", cmd->flags); + return -EINVAL; + } + return 0; +} + +static void print_unpreimport_param(const struct obmm_cmd_preimport *cmd) +{ + pr_info("obmm_unpreimport: pa=%#llx, length=%#llx.\n", cmd->pa, cmd->length); +} + +int obmm_unpreimport(struct obmm_cmd_preimport *cmd) +{ + int ret; + + print_unpreimport_param(cmd); + ret = check_unpreimport_cmd_common(cmd); + if (ret) + return ret; + + ret = preimport_release_prefilled(cmd->pa, cmd->pa + cmd->length - 1); + if (ret == 0) + module_put(THIS_MODULE); + pr_info("%s: unpreimport on nid=%d finished.\n", __func__, cmd->numa_id); + + return ret; +} + +static void *preimp_info_seq_start(struct seq_file *m __always_unused, loff_t *pos) +{ + mutex_lock(&list_mutex); + /* Shift the position by 1 to make place for table header. */ + if (*pos == 0) + return SEQ_START_TOKEN; + return seq_list_start(&pr_list, *pos - 1); +} + +static void *preimp_info_seq_next(struct seq_file *m __always_unused, void *v, loff_t *pos) +{ + /* SEQ_START_TOKEN is a reserved which matches with the dummy header of list. The next + * element of the dummy header is the first real element. + */ + if (v == SEQ_START_TOKEN) + v = &pr_list; + return seq_list_next(v, &pr_list, pos); +} + +static void preimp_info_seq_stop(struct seq_file *m __always_unused, void *v __always_unused) +{ + mutex_unlock(&list_mutex); +} + +#define PA_WIDTH 16 +#define CNA_WIDTH 8 +#define HALF_EID_WIDTH 18 +#define FULL_EID_WIDTH (2 * HALF_EID_WIDTH + 1) +#define NID_WIDTH 3 +static int preimp_info_seq_show(struct seq_file *m, void *v) +{ + const struct preimport_range *pr = list_entry(v, struct preimport_range, node); + + if (v == SEQ_START_TOKEN) + seq_printf(m, "%-*s - %-*s : %-*s %-*s %-*s %-*s %-*s\n", PA_WIDTH, + "pa_start", PA_WIDTH, "pa_end", CNA_WIDTH, "dcna", CNA_WIDTH, "scna", + FULL_EID_WIDTH, "deid", FULL_EID_WIDTH, "seid", NID_WIDTH, "nid"); + else + seq_printf(m, + "%-*llx - %-*llx : %#-*x %#-*x " + EID_ALIGNED_FMT64 " " EID_ALIGNED_FMT64 " %-*d\n", + PA_WIDTH, pr->start, PA_WIDTH, pr->end, CNA_WIDTH, pr->dcna, CNA_WIDTH, + pr->scna, HALF_EID_WIDTH, EID_ARGS64_H(pr->deid), HALF_EID_WIDTH, + EID_ARGS64_L(pr->deid), HALF_EID_WIDTH, EID_ARGS64_H(pr->seid), + HALF_EID_WIDTH, EID_ARGS64_L(pr->seid), NID_WIDTH, pr->numa_id); + return 0; +} + +static const struct seq_operations preimp_info_sops = { + .start = preimp_info_seq_start, + .stop = preimp_info_seq_stop, + .next = preimp_info_seq_next, + .show = preimp_info_seq_show, +}; + +static int init_preimport_info_seqfile(void) +{ + struct proc_dir_entry *p; + + p = proc_mkdir("obmm", NULL); + if (!p) { + pr_err("failed to init obmm proc dir.\n"); + return -ENOMEM; + } + p = proc_create_seq("obmm/preimport_info", 0, NULL, &preimp_info_sops); + if (!p) { + pr_err("failed to init obmm proc file.\n"); + + remove_proc_subtree("obmm", NULL); + return -ENOMEM; + } + return 0; +} + +int module_preimport_init(void) +{ + int ret; + + ret = init_preimport_info_seqfile(); + if (ret) + return ret; + + preimport_init_prefilled(); + + return 0; +} + +void module_preimport_exit(void) +{ + preimport_exit_prefilled(); + + WARN_ON(remove_proc_subtree("obmm", NULL)); +} diff --git a/drivers/ub/obmm/obmm_preimport.h b/drivers/ub/obmm/obmm_preimport.h new file mode 100644 index 000000000000..2b61fa665cc7 --- /dev/null +++ b/drivers/ub/obmm/obmm_preimport.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ +#ifndef OBMM_PREIMPORT_H +#define OBMM_PREIMPORT_H + +#include +#include +#include "obmm_core.h" + +struct ub_mem_info; + +struct preimport_range { + int numa_id; + + phys_addr_t start; + phys_addr_t end; + + unsigned int scna; + unsigned int dcna; + u8 seid[16]; + u8 deid[16]; + unsigned int use_count; + + struct list_head node; +}; + +extern void *not_ready_ptr; + +int check_preimport_cmd_common(const struct obmm_cmd_preimport *cmd_preimport); +int preimport_prepare_common(struct preimport_range *preimport_range, uint8_t base_dist); +int preimport_release_common(struct preimport_range *preimport_range, bool force); +int check_preimport_datapath_common(const struct preimport_range *preimport_range, + const struct obmm_datapath *datapath); + +int preimport_prepare_prefilled(struct obmm_cmd_preimport *cmd_preimport); +int preimport_release_prefilled(phys_addr_t start, phys_addr_t end); +void preimport_init_prefilled(void); +void preimport_exit_prefilled(void); + +/* belows are exposed to other components of OBMM */ +bool is_numa_base_dist_valid(uint8_t base_dist); +int obmm_set_numa_distance(unsigned int cna, int nid_remote, uint8_t base_dist); + +int obmm_preimport(struct obmm_cmd_preimport *cmd_preimport); +int obmm_unpreimport(struct obmm_cmd_preimport *cmd_preimport); +int module_preimport_init(void); +void module_preimport_exit(void); + +int preimport_commit_prefilled(phys_addr_t start, phys_addr_t end, + const struct obmm_datapath *datapath, int *p_numa_id, + void **p_handle); +int preimport_uncommit_prefilled(void *handle, phys_addr_t start, phys_addr_t end); + +#endif diff --git a/drivers/ub/obmm/obmm_preimport_prefilled.c b/drivers/ub/obmm/obmm_preimport_prefilled.c new file mode 100644 index 000000000000..ce27ed23851d --- /dev/null +++ b/drivers/ub/obmm/obmm_preimport_prefilled.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#include +#include +#include + +#include "obmm_preimport.h" +#include "obmm_addr_check.h" + +struct prefilled_preimport_range { + struct preimport_range pr; + spinlock_t bitmap_lock; + unsigned long nbits; + unsigned long *bitmap; +}; +static DEFINE_MUTEX(preimport_mutex); + +static int create_prefilled_preimport_range(const struct obmm_cmd_preimport *cmd, + struct prefilled_preimport_range **p_ppr) +{ + struct prefilled_preimport_range *ppr; + + ppr = kzalloc(sizeof(struct prefilled_preimport_range), GFP_KERNEL); + if (ppr == NULL) + return -ENOMEM; + + ppr->pr.numa_id = cmd->numa_id; + ppr->pr.start = cmd->pa; + ppr->pr.end = cmd->pa + cmd->length - 1; + ppr->pr.scna = cmd->scna; + ppr->pr.dcna = cmd->dcna; + memcpy(ppr->pr.deid, cmd->deid, sizeof(cmd->deid)); + memcpy(ppr->pr.seid, cmd->seid, sizeof(cmd->seid)); + ppr->pr.use_count = 0; + + spin_lock_init(&ppr->bitmap_lock); + ppr->nbits = cmd->length / memory_block_size_bytes(); + ppr->bitmap = bitmap_zalloc(ppr->nbits, GFP_KERNEL); + if (!ppr->bitmap) { + pr_err("failed to allocate preimport range bitmap.\n"); + kfree(ppr); + return -ENOMEM; + } + + *p_ppr = ppr; + return 0; +} + +static void destroy_prefilled_preimport_range(const struct prefilled_preimport_range *ppr) +{ + kfree(ppr->bitmap); + kfree(ppr); +} + +static int get_pa_mapping(phys_addr_t addr, struct prefilled_preimport_range **p_ppr) +{ + int ret; + struct obmm_addr_info info; + + ret = query_pa_range(addr, &info); + if (ret) { + pr_err("No information found with PA=%pa.\n", &addr); + return ret; + } + if (info.user != OBMM_ADDR_USER_PREIMPORT) { + pr_err("PA=%pa is not a preimport address.\n", &addr); + return -EINVAL; + } + if (info.data == not_ready_ptr) { + pr_err("Preimport process not finished. Try later.\n"); + return -EAGAIN; + } + *p_ppr = (struct prefilled_preimport_range *)info.data; + + pr_debug("prefilled preimport range found with PA %pa.\n", &addr); + return 0; +} + +static int check_preimport_cmd(const struct obmm_cmd_preimport *cmd) +{ + int ret; + + ret = check_preimport_cmd_common(cmd); + if (ret) + return ret; + + if (cmd->pa == 0) { + pr_err("invalid preimport PA base addr 0.\n"); + return -EINVAL; + } + return 0; +} + +int preimport_prepare_prefilled(struct obmm_cmd_preimport *cmd) +{ + int ret; + struct prefilled_preimport_range *ppr; + struct obmm_pa_range pa_range; + + ret = check_preimport_cmd(cmd); + if (ret) + return ret; + + pa_range.start = cmd->pa; + pa_range.end = cmd->pa + cmd->length - 1; + pa_range.info.user = OBMM_ADDR_USER_PREIMPORT; + pa_range.info.data = not_ready_ptr; + ret = occupy_pa_range(&pa_range); + if (ret) + return ret; + + ret = create_prefilled_preimport_range(cmd, &ppr); + if (ret) + goto err_free_pa_range; + + ret = preimport_prepare_common(&ppr->pr, cmd->base_dist); + if (ret) + goto err_destroy_ppr; + cmd->numa_id = ppr->pr.numa_id; + + /* make ppr accessible to others, no more access! (ppr might be freed by racers.) */ + pa_range.info.data = (void *)ppr; + ret = update_pa_range(pa_range.start, &pa_range.info); + if (ret) { + cmd->numa_id = NUMA_NO_NODE; + goto err_unprepare_common; + } + + return 0; + +err_unprepare_common: + WARN_ON(preimport_release_common(&ppr->pr, true)); +err_destroy_ppr: + destroy_prefilled_preimport_range(ppr); +err_free_pa_range: + WARN_ON(free_pa_range(&pa_range)); + return ret; +} + +int preimport_release_prefilled(phys_addr_t start, phys_addr_t end) +{ + int ret; + struct obmm_pa_range pa_range; + struct prefilled_preimport_range *ppr; + + mutex_lock(&preimport_mutex); + ret = get_pa_mapping(start, &ppr); + if (ret) { + pr_err("failed to identify preimport range during unpreimport.\n"); + goto err_unlock; + } + /* must be an exact match */ + if (ppr->pr.start != start || ppr->pr.end != end) { + pr_err("requested range touches ppr<%pa> but is not an exact match.\n", + &ppr->pr.start); + ret = -EINVAL; + goto err_unlock; + } + if (ppr->pr.use_count != 0) { + pr_err("ppr<%pa> cannot be released: %u active users found.\n", &ppr->pr.start, + ppr->pr.use_count); + ret = -EBUSY; + goto err_unlock; + } + ret = preimport_release_common(&ppr->pr, false); + if (ret) { + pr_err("failed to release ppr<%pa>.\n", &ppr->pr.start); + goto err_unlock; + } + /* roll back is not possible from this point */ + + pa_range.start = ppr->pr.start; + pa_range.end = ppr->pr.end; + pa_range.info.user = OBMM_ADDR_USER_PREIMPORT; + pa_range.info.data = (void *)ppr; + WARN_ON(free_pa_range(&pa_range)); + + mutex_unlock(&preimport_mutex); + + destroy_prefilled_preimport_range(ppr); + pr_debug("ppr<%pa> released.\n", &start); + return ret; + +err_unlock: + mutex_unlock(&preimport_mutex); + return ret; +} + +static int get_ppr(phys_addr_t pa, struct prefilled_preimport_range **p_ppr) +{ + int ret; + struct prefilled_preimport_range *ppr; + + mutex_lock(&preimport_mutex); + ret = get_pa_mapping(pa, &ppr); + if (ret) + goto out_unlock; + if (ppr == not_ready_ptr) { + pr_err("ppr <%pa> not ready yet.\n", &pa); + ret = -EAGAIN; + goto out_unlock; + } + pr_debug("ppr <%pa> refcount: %u -> %u.\n", &pa, ppr->pr.use_count, ppr->pr.use_count + 1); + ppr->pr.use_count += 1; + *p_ppr = ppr; +out_unlock: + mutex_unlock(&preimport_mutex); + return ret; +} + +static void put_ppr(struct prefilled_preimport_range *ppr) +{ + mutex_lock(&preimport_mutex); + WARN_ON(ppr->pr.use_count == 0); + pr_debug("ppr <%pa> refcount: %u -> %u.\n", &ppr->pr.start, ppr->pr.use_count, + ppr->pr.use_count - 1); + ppr->pr.use_count -= 1; + mutex_unlock(&preimport_mutex); +} + +static int occupy_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t start, + phys_addr_t end) +{ + int ret = 0; + unsigned long bit, init_bit, end_bit, flags; + + spin_lock_irqsave(&ppr->bitmap_lock, flags); + if (start < ppr->pr.start || end > ppr->pr.end) { + pr_err("requested range [%pa, %pa] is not managed by ppr [%pa, %pa].\n", &start, + &end, &ppr->pr.start, &ppr->pr.end); + ret = -EINVAL; + goto out_unlock; + } + init_bit = (start - ppr->pr.start) / memory_block_size_bytes(); + end_bit = (end - ppr->pr.start) / memory_block_size_bytes(); + + for (bit = init_bit; bit <= end_bit; bit++) { + if (test_bit(bit, ppr->bitmap)) { + ret = -EEXIST; + pr_err("conflicts on preimport block %lu of ppr<%pa>.\n", bit, + &ppr->pr.start); + goto out_unlock; + } + } + + for (bit = init_bit; bit <= end_bit; bit++) + set_bit(bit, ppr->bitmap); + pr_debug("ppr<%pa>: bitmap[%lu, %lu] set.\n", &ppr->pr.start, init_bit, end_bit); + +out_unlock: + spin_unlock_irqrestore(&ppr->bitmap_lock, flags); + return ret; +} + +static int free_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t start, + phys_addr_t end) +{ + int ret = 0; + unsigned long bit, init_bit, end_bit, flags; + + spin_lock_irqsave(&ppr->bitmap_lock, flags); + if (start < ppr->pr.start || end > ppr->pr.end) { + pr_err("requested range [%pa, %pa] is not managed by ppr [%pa, %pa].\n", &start, + &end, &ppr->pr.start, &ppr->pr.end); + ret = -EINVAL; + goto out_unlock; + } + init_bit = (start - ppr->pr.start) / memory_block_size_bytes(); + end_bit = (end - ppr->pr.start) / memory_block_size_bytes(); + + for (bit = init_bit; bit <= end_bit; bit++) { + if (!test_bit(bit, ppr->bitmap)) { + ret = -EINVAL; + pr_err("preimport block %lu of ppr<%pa> never used.\n", bit, + &ppr->pr.start); + goto out_unlock; + } + } + + for (bit = init_bit; bit <= end_bit; bit++) + clear_bit(bit, ppr->bitmap); + pr_debug("ppr<%pa>: bitmap[%lu, %lu] cleared.\n", &ppr->pr.start, init_bit, end_bit); + +out_unlock: + spin_unlock_irqrestore(&ppr->bitmap_lock, flags); + return ret; +} + +/* alignment checked by callers */ +int preimport_commit_prefilled(phys_addr_t start, phys_addr_t end, + const struct obmm_datapath *datapath, int *p_numa_id, + void **p_handle) +{ + int ret; + struct prefilled_preimport_range *ppr; + + ret = get_ppr(start, &ppr); + if (ret) + return ret; + + /* TODO: move to out */ + ret = check_preimport_datapath_common(&ppr->pr, datapath); + if (ret) + goto err_put_ppr; + + ret = occupy_ppr_blocks(ppr, start, end); + if (ret) + goto err_put_ppr; + + *p_numa_id = ppr->pr.numa_id; + *p_handle = (void *)ppr; + return 0; + +err_put_ppr: + put_ppr(ppr); + return ret; +} + +int preimport_uncommit_prefilled(void *handle, phys_addr_t start, phys_addr_t end) +{ + int ret; + struct prefilled_preimport_range *ppr; + + ppr = (struct prefilled_preimport_range *)handle; + ret = free_ppr_blocks(handle, start, end); + if (ret) + return ret; + + put_ppr(ppr); + return ret; +} + +void preimport_init_prefilled(void) +{ +} + +void preimport_exit_prefilled(void) +{ +} diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index b369e92a860a..3dd689dabc95 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -82,8 +82,10 @@ struct obmm_cmd_addr_query { } __attribute__((aligned(8))); #define OBMM_IMPORT_FLAG_ALLOW_MMAP 0x1UL +#define OBMM_IMPORT_FLAG_PREIMPORT 0x2UL #define OBMM_IMPORT_FLAG_NUMA_REMOTE 0x4UL #define OBMM_IMPORT_FLAG_MASK (OBMM_IMPORT_FLAG_ALLOW_MMAP | \ + OBMM_IMPORT_FLAG_PREIMPORT | \ OBMM_IMPORT_FLAG_NUMA_REMOTE) @@ -117,6 +119,8 @@ struct obmm_cmd_unimport { #define OBMM_CMD_UNIMPORT _IOW('x', 3, struct obmm_cmd_unimport) #define OBMM_CMD_ADDR_QUERY _IOWR('x', 4, struct obmm_cmd_addr_query) #define OBMM_CMD_EXPORT_PID _IOWR('x', 5, struct obmm_cmd_export_pid) +#define OBMM_CMD_DECLARE_PREIMPORT _IOWR('x', 6, struct obmm_cmd_preimport) +#define OBMM_CMD_UNDECLARE_PREIMPORT _IOW('x', 7, struct obmm_cmd_preimport) /* cache maintenance operations (not states) */ /* no cache maintenance (nops) */ @@ -133,6 +137,23 @@ struct obmm_cmd_unimport { */ #define OBMM_SHM_CACHE_INFER 0x4 +struct obmm_cmd_preimport { + __u64 pa; + __u64 length; + __u64 flags; + __u32 scna; + __u32 dcna; + __s32 numa_id; + __u16 priv_len; + __u8 base_dist; + __u8 deid[16]; + __u8 seid[16]; + const void *priv; +} __attribute__((aligned(16), packed)); + +#define OBMM_PREIMPORT_FLAG_MASK (0UL) +#define OBMM_UNPREIMPORT_FLAG_MASK (0UL) + #if defined(__cplusplus) } #endif -- Gitee From 6578be6a6477c5dc8ed7b0cb30334f3544294730 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:41 +0800 Subject: [PATCH 18/48] obmm: Add shared memory device interface commit b100600e5c187238984b334e2fce60bedb545b24 openEuler This patch adds obmm_shmdev character devices to manage OBMM remote memory. For an OBMM memory region with memid ${id}, the corresponding device path is /dev/obmm_shmdev${id}. The obmm_shmdev devices provide ioctl interfaces for user-space libraries (libobmm) to configure memory devices, and mmap interfaces for general user-space applications to map the OBMM memory. This implementation ensures both import and export regions have corresponding memory devices, providing a standardized interface for user-space applications to interact with OBMM memory regions. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 2 +- drivers/ub/obmm/obmm_core.c | 19 +++++ drivers/ub/obmm/obmm_core.h | 3 + drivers/ub/obmm/obmm_shm_dev.c | 144 +++++++++++++++++++++++++++++++++ drivers/ub/obmm/obmm_shm_dev.h | 16 ++++ 5 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/obmm_shm_dev.c create mode 100644 drivers/ub/obmm/obmm_shm_dev.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 0b7410251bf5..082ec771a2aa 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0+ obmm-y := obmm_core.o \ - obmm_cache.o obmm_import.o \ + obmm_shm_dev.o obmm_cache.o obmm_import.o \ obmm_export_from_user.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o obmm_preimport.o \ ubmempool_allocator.o obmm_export_region_ops.o \ diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index 8812a62b2a89..be0033541990 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -20,6 +20,7 @@ #include +#include "obmm_shm_dev.h" #include "obmm_cache.h" #include "obmm_export_region_ops.h" #include "ubmempool_allocator.h" @@ -321,11 +322,19 @@ int register_obmm_region(struct obmm_region *region) { int retval; + /* create device */ + retval = obmm_shm_dev_add(region); + if (retval) { + pr_err("Failed to create device %d. ret=%pe\n", region->regionid, ERR_PTR(retval)); + return retval; + } + /* insert OBMM_region */ retval = insert_obmm_region(region); if (retval < 0) { pr_err("Failed to insert obmm region %d on creation. ret=%pe\n", region->regionid, ERR_PTR(retval)); + obmm_shm_dev_del(region); return retval; } @@ -335,6 +344,7 @@ int register_obmm_region(struct obmm_region *region) void deregister_obmm_region(struct obmm_region *region) { remove_obmm_region(region); + obmm_shm_dev_del(region); } int set_obmm_region_priv(struct obmm_region *region, unsigned int priv_len, const void __user *priv) @@ -573,6 +583,12 @@ static int __init obmm_init(void) spin_lock_init(&g_obmm_ctx_info.lock); INIT_LIST_HEAD(&g_obmm_ctx_info.regions); + ret = obmm_shm_dev_init(); + if (ret) { + pr_err("failed to initialize obmm_shm_dev. ret=%pe\n", ERR_PTR(ret)); + goto out_misc_deregister; + } + module_addr_check_init(); ret = module_preimport_init(); @@ -586,6 +602,8 @@ static int __init obmm_init(void) out_addr_check_exit: module_addr_check_exit(); + obmm_shm_dev_exit(); +out_misc_deregister: misc_deregister(&obmm_dev_handle); out_allocator_exit: ubmempool_allocator_exit(); @@ -598,6 +616,7 @@ static void __exit obmm_exit(void) module_preimport_exit(); module_addr_check_exit(); + obmm_shm_dev_exit(); misc_deregister(&obmm_dev_handle); ubmempool_allocator_exit(); diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index 5420bebba615..2d85331d8c88 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -77,6 +77,9 @@ struct obmm_region { unsigned long flags; + struct cdev cdevice; + struct device device; + refcount_t refcnt; /* the total size of all memory segments included in meminfo */ diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c new file mode 100644 index 000000000000..333c388377a9 --- /dev/null +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include + +#include "obmm_cache.h" +#include "obmm_export_region_ops.h" +#include "obmm_import.h" +#include "obmm_shm_dev.h" + +static dev_t obmm_devt; + +static const char *obmm_shm_region_name = "OBMM_SHMDEV"; +static const char *obmm_shm_rootdev_name = "obmm"; +static struct device *obmm_shm_rootdev; + +static int obmm_shm_fops_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int obmm_shm_fops_flush(struct file *file __always_unused, fl_owner_t owner __always_unused) +{ + return 0; +} + +static int obmm_shm_fops_release(struct inode *inode __always_unused, struct file *file) +{ + return 0; +} + +static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) +{ + return -ENOTTY; +} + +static long obmm_shm_fops_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +const struct file_operations obmm_shm_fops = { .owner = THIS_MODULE, + .unlocked_ioctl = obmm_shm_fops_ioctl, + .mmap = obmm_shm_fops_mmap, + .open = obmm_shm_fops_open, + .flush = obmm_shm_fops_flush, + .release = obmm_shm_fops_release }; + +static void obmm_shm_dev_release(struct device *dev) +{ + struct obmm_region *reg; + + reg = container_of(dev, struct obmm_region, device); + module_put(THIS_MODULE); +} + +int obmm_shm_dev_add(struct obmm_region *reg) +{ + int ret; + dev_t devt; + + if (!try_module_get(THIS_MODULE)) { + pr_err("Module is dying. Reject all memory requests\n"); + return -EPERM; + } + + devt = MKDEV(MAJOR(obmm_devt), reg->regionid); + cdev_init(®->cdevice, &obmm_shm_fops); + reg->cdevice.owner = THIS_MODULE; + reg->device.devt = devt; + reg->device.release = obmm_shm_dev_release; + reg->device.parent = obmm_shm_rootdev; + device_initialize(®->device); + + ret = dev_set_name(®->device, "obmm_shmdev%d", reg->regionid); + if (ret) { + pr_err("Failed to set name for shmdev %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto err_put_dev; + } + + ret = cdev_device_add(®->cdevice, ®->device); + if (ret) { + pr_err("Failed to add shm device %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto err_put_dev; + } + + return 0; + + /* NOTE: If the device is properly initialized, the refcount of module + * should be maintained by device kobject (and the associated + * obmm_shm_dev_release function). The refcount of region is always + * recovered by kobject-triggered release function. + */ +err_put_dev: + put_device(®->device); + return ret; +} + +void obmm_shm_dev_del(struct obmm_region *reg) +{ + cdev_device_del(®->cdevice, ®->device); + put_device(®->device); +} + +int obmm_shm_dev_init(void) +{ + int ret; + + pr_info("shmdev: root device initialization started\n"); + ret = alloc_chrdev_region(&obmm_devt, OBMM_MIN_VALID_REGIONID, OBMM_REGIONID_MAX_COUNT, + obmm_shm_region_name); + if (ret) { + pr_err("Failed to allocate char device ID. ret=%pe\n", ERR_PTR(ret)); + goto err_reg_alloc; + } + + obmm_shm_rootdev = root_device_register(obmm_shm_rootdev_name); + if (IS_ERR_OR_NULL(obmm_shm_rootdev)) { + pr_err("error register obmm root device\n"); + ret = -ENOMEM; + goto err_rootdev; + } + + pr_info("shmdev: root device initialization completed\n"); + return 0; +err_rootdev: + unregister_chrdev_region(obmm_devt, OBMM_REGIONID_MAX_COUNT); +err_reg_alloc: + return ret; +} + +void obmm_shm_dev_exit(void) +{ + pr_info("shmdev: root device starts shutting down\n"); + root_device_unregister(obmm_shm_rootdev); + unregister_chrdev_region(obmm_devt, OBMM_REGIONID_MAX_COUNT); + pr_info("shmdev: root device shut down completed\n"); +} diff --git a/drivers/ub/obmm/obmm_shm_dev.h b/drivers/ub/obmm/obmm_shm_dev.h new file mode 100644 index 000000000000..bfced747a2aa --- /dev/null +++ b/drivers/ub/obmm/obmm_shm_dev.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef OBMM_SHM_DEV_H +#define OBMM_SHM_DEV_H + +#include "obmm_core.h" + +int obmm_shm_dev_init(void); +void obmm_shm_dev_exit(void); +int obmm_shm_dev_add(struct obmm_region *reg); +void obmm_shm_dev_del(struct obmm_region *reg); + +#endif -- Gitee From 3bc9efa063a79b8ff3db22d616aae7fbdd0cef7f Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:42 +0800 Subject: [PATCH 19/48] obmm: Add mmap support for shared memory regions commit 49ddfaab9aa31a41039a8833c746a18553ae0968 openEuler Implement mmap operations to allow direct memory mapping of OBMM regions into user space, providing zero-copy access. Key features include: 1. Implemented mmap operation allowing users to directly read and write OBMM memory 2. Added cache policy control based on O_SYNC flag - When opening the device with O_SYNC flag, memory is mapped as non-cacheable - Without O_SYNC flag, memory is mapped as cacheable (normal) 3. Added support for PMD huge page mode mapping - Introduced OBMM_MMAP_FLAG_HUGETLB_PMD flag - Allows users to request huge page mappings for better performance This enables efficient zero-copy access to shared memory regions for user-space applications. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/obmm_core.c | 2 + drivers/ub/obmm/obmm_core.h | 19 ++ drivers/ub/obmm/obmm_shm_dev.c | 323 ++++++++++++++++++++++++++++++++- include/uapi/ub/obmm.h | 15 ++ 4 files changed, 358 insertions(+), 1 deletion(-) diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index be0033541990..2cf961cc5dc6 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -298,6 +298,7 @@ static void remove_obmm_region(struct obmm_region *reg) void uninit_obmm_region(struct obmm_region *region) { ida_free(&g_obmm_region_ida, region->regionid); + mutex_destroy(®ion->state_mutex); } int init_obmm_region(struct obmm_region *region) @@ -305,6 +306,7 @@ int init_obmm_region(struct obmm_region *region) int retval; refcount_set(®ion->refcnt, 0); + mutex_init(®ion->state_mutex); INIT_LIST_HEAD(®ion->node); retval = ida_alloc_range(&g_obmm_region_ida, OBMM_MIN_VALID_REGIONID, diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index 2d85331d8c88..f193de575ecb 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -46,6 +46,12 @@ enum obmm_region_type { OBMM_IMPORT_REGION }; +enum obmm_mmap_mode { + OBMM_MMAP_INIT, + OBMM_MMAP_NORMAL, + OBMM_MMAP_OSYNC +}; + enum obmm_mmap_granu { OBMM_MMAP_GRANU_NONE, OBMM_MMAP_GRANU_PAGE, @@ -84,6 +90,13 @@ struct obmm_region { /* the total size of all memory segments included in meminfo */ u64 mem_size; + /* + * current mapping mode. + * init: mmap_mode = OBMM_MMAP_INIT + * cc-mmap: mmap_mode = OBMM_MMAP_NORMAL + * nc-mmap: mmap_mode = OBMM_MMAP_OSYNC + */ + enum obmm_mmap_mode mmap_mode; /* * the granularity of memory mapping, initially OBMM_MMAP_GRANU_NONE. * When users call mmap, the mmap granularity is determined based on @@ -96,6 +109,12 @@ struct obmm_region { * OBMM_MEM_ALLOW_NONCACHEABLE_MMAP: Supports non-cacheable mapping */ unsigned long mem_cap; + /* number of mmap */ + unsigned long mmap_count; + + /* protect ownership_info and serialize concurrent page table change requests */ + struct mutex state_mutex; + /* regions are chained into a list for management */ struct list_head node; diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index 333c388377a9..6e135b0eb1a8 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -19,9 +19,133 @@ static dev_t obmm_devt; static const char *obmm_shm_region_name = "OBMM_SHMDEV"; static const char *obmm_shm_rootdev_name = "obmm"; static struct device *obmm_shm_rootdev; +/** + * Convert VM flags to mem state + */ +static unsigned long get_vma_mem_state(const vm_flags_t vm_flags, bool cacheable) +{ + unsigned long mem_state; + + if (vm_flags & VM_WRITE) + mem_state = OBMM_SHM_MEM_READWRITE; + else if ((vm_flags & VM_READ) && (vm_flags & VM_EXEC)) + mem_state = OBMM_SHM_MEM_READEXEC; + else if (vm_flags & VM_READ) + mem_state = OBMM_SHM_MEM_READONLY; + else + mem_state = OBMM_SHM_MEM_NO_ACCESS; + + if (cacheable && mem_state != OBMM_SHM_MEM_NO_ACCESS) + mem_state |= OBMM_SHM_MEM_NORMAL; + else + mem_state |= OBMM_SHM_MEM_NORMAL_NC; + pr_debug("VMA init mem_state: vma_flags=0x%lx, cacheable=%d, mem_state=0x%lx\n", + vm_flags, cacheable, mem_state); + return mem_state; +} + +/* VMA operations for obmm-mmaped VMA */ +static void obmm_vma_open(struct vm_area_struct *vma) +{ + pr_debug("VMA opened range (0x%lx-0x%lx)\n", vma->vm_start, vma->vm_end); +} + +static void obmm_vma_close(struct vm_area_struct *vma) +{ + struct obmm_region *reg; + + reg = (struct obmm_region *)vma->vm_file->private_data; + + mutex_lock(®->state_mutex); + reg->mmap_count--; + if (reg->mmap_count == 0) { + /* reset mmap_mode */ + reg->mmap_mode = OBMM_MMAP_INIT; + } + mutex_unlock(®->state_mutex); + pr_debug("obmm_shmdev munmap: mem_id=%d pid=%d vma=[%#lx, %#lx]\n", reg->regionid, + current->pid, vma->vm_start, vma->vm_end); +} + +static int obmm_vma_may_split(struct vm_area_struct *vma __always_unused, + unsigned long addr __always_unused) +{ + /* not supported */ + pr_err("VMA may split at 0x%lx (range: 0x%lx-0x%lx), but split not supported\n", addr, + vma->vm_start, vma->vm_end); + return -EOPNOTSUPP; +} + +static int obmm_vma_mremap(struct vm_area_struct *vma __always_unused) +{ + pr_warn("mremap not supported\n"); + return -EOPNOTSUPP; +} + +static int obmm_vma_mprotect(struct vm_area_struct *vma __always_unused, + unsigned long start __always_unused, unsigned long end __always_unused, + unsigned long newflags __always_unused) +{ + pr_warn("mprotect not supported\n"); + return -EOPNOTSUPP; +} +static vm_fault_t obmm_vma_fault(struct vm_fault *vmf __always_unused) +{ + pr_warn("Unexpected fault\n"); + return VM_FAULT_SIGBUS; +} +static int obmm_vma_access(struct vm_area_struct *vma __always_unused, + unsigned long addr __always_unused, void *buf __always_unused, + int len __always_unused, int write __always_unused) +{ + pr_warn("access not supported\n"); + return -EOPNOTSUPP; +} +static const char *obmm_vma_name(struct vm_area_struct *vma __always_unused) +{ + return "OBMM_SHM"; +} + +static const struct vm_operations_struct obmm_vm_ops = { + .open = obmm_vma_open, + .close = obmm_vma_close, + .may_split = obmm_vma_may_split, + .mremap = obmm_vma_mremap, + .mprotect = obmm_vma_mprotect, + .fault = obmm_vma_fault, + .access = obmm_vma_access, + .name = obmm_vma_name, +}; static int obmm_shm_fops_open(struct inode *inode, struct file *file) { + struct obmm_region *reg; + bool cacheable; + + reg = container_of(inode->i_cdev, struct obmm_region, cdevice); + file->private_data = reg; + + pr_debug("obmm_shmdev open: mem_id=%d pid=%d f_mode=%#x f_flags=%#x\n", reg->regionid, + current->pid, file->f_mode, file->f_flags); + + cacheable = !(file->f_flags & O_SYNC); + if (cacheable && !(reg->mem_cap & OBMM_MEM_ALLOW_CACHEABLE_MMAP)) { + pr_err("Noncacheable region %d cannot be mmaped with cachable mode.\n", + reg->regionid); + return -EPERM; + } + if (!cacheable && !(reg->mem_cap & OBMM_MEM_ALLOW_NONCACHEABLE_MMAP)) { + pr_err("Cacheable region %d cannot be mmaped with noncachable mode.\n", + reg->regionid); + return -EPERM; + } + if (try_get_obmm_region(reg) == NULL) { + pr_err("obmm_shmdev open: The device is in creation or destruction process. Open failed.\n"); + return -EAGAIN; + } + + pr_debug("obmm_shmdev open: mem_id=%d pid=%d completed.\n", reg->regionid, current->pid); + return 0; } @@ -32,12 +156,206 @@ static int obmm_shm_fops_flush(struct file *file __always_unused, fl_owner_t own static int obmm_shm_fops_release(struct inode *inode __always_unused, struct file *file) { + struct obmm_region *reg = (struct obmm_region *)file->private_data; + + pr_debug("obmm_shmdev release: mem_id=%d pid=%d\n", reg->regionid, current->pid); + put_obmm_region(reg); + return 0; } +static int map_obmm_region(struct vm_area_struct *vma, struct obmm_region *reg, + enum obmm_mmap_granu mmap_granu) +{ + struct obmm_export_region *e_reg; + struct obmm_import_region *i_reg; + + pr_debug("mmap region %d: size=%#llx\n", reg->regionid, reg->mem_size); + if (reg->type == OBMM_IMPORT_REGION) { + i_reg = container_of(reg, struct obmm_import_region, region); + return map_import_region(vma, i_reg, mmap_granu); + } + + e_reg = container_of(reg, struct obmm_export_region, region); + return map_export_region(vma, e_reg, mmap_granu); +} + +/* Return page table protection bits. + * @mem_state must be validated by caller. + */ +static pgprot_t mem_state_to_pgprot(unsigned long mem_state) +{ + pgprot_t pgprot; + + /* initialize pgprot to be normal memory pgprot with certain access rights */ + if ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READONLY) + pgprot = PAGE_READONLY; + else if ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READEXEC) + pgprot = PAGE_READONLY_EXEC; + else if ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READWRITE) + pgprot.pgprot = _PAGE_READONLY & ~PTE_RDONLY; + else + pgprot = PAGE_NONE; + + /* modify cacheability attribute if necessary */ + if ((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL_NC) + pgprot = pgprot_writecombine(pgprot); + else if ((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_DEVICE) + pgprot = pgprot_noncached(pgprot); + + return pgprot; +} + +static void print_mmap_param(const struct file *file, const struct vm_area_struct *vma) +{ + const struct obmm_region *reg = (struct obmm_region *)file->private_data; + const char *vm_flags_desc, *f_flags_desc; + + pr_debug("obmm_shmdev mmap: mem_id=%d pid=%d vma=[%#lx, %#lx] pgoff=%#lx ", reg->regionid, + current->pid, vma->vm_start, vma->vm_end, vma->vm_pgoff); + + if (vma->vm_flags & VM_WRITE) + vm_flags_desc = "W"; + else if ((vma->vm_flags & VM_READ) && (vma->vm_flags & VM_EXEC)) + vm_flags_desc = "RX"; + else if (vma->vm_flags & VM_READ) + vm_flags_desc = "R"; + else + vm_flags_desc = "N"; + + if (file->f_flags & O_SYNC) + f_flags_desc = "O_SYNC"; + else + f_flags_desc = "not O_SYNC"; + + pr_debug("vm_flags=%#lx(%s) f_flags=%#x(%s)\n", vma->vm_flags, vm_flags_desc, file->f_flags, + f_flags_desc); +} + +static bool validate_perm(struct file *file, vm_flags_t vm_flags) +{ + if (((vm_flags & VM_READ) && !(file->f_mode & FMODE_READ)) || + ((vm_flags & VM_WRITE) && !(file->f_mode & FMODE_WRITE)) || + ((vm_flags & VM_EXEC) && !(file->f_mode & FMODE_READ))) { + pr_err("%s false: vm_flags: %#lx, f_mode: %#x\n", __func__, vm_flags, file->f_mode); + return false; + } + return true; +} + static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) { - return -ENOTTY; + struct obmm_region *reg = (struct obmm_region *)file->private_data; + unsigned long size, offset; + uint8_t mem_state; + enum obmm_mmap_mode old_mmap_mode; + enum obmm_mmap_granu mmap_granu; + int ret; + bool cacheable, o_sync; + + print_mmap_param(file, vma); + if (!region_allow_mmap(reg)) { + pr_err("mmap region %d: not allow to be mmaped", reg->regionid); + return -EPERM; + } + + if (!validate_perm(file, vma->vm_flags)) { + pr_err("mmap region %d: invalid vma permission", reg->regionid); + return -EPERM; + } + + o_sync = file->f_flags & O_SYNC; + size = vma->vm_end - vma->vm_start; + offset = vma->vm_pgoff << PAGE_SHIFT; + + if (offset & OBMM_MMAP_FLAG_HUGETLB_PMD) { + pr_debug("trying hugepage mmap"); + mmap_granu = OBMM_MMAP_GRANU_PMD; + offset &= ~OBMM_MMAP_FLAG_HUGETLB_PMD; + } else { + mmap_granu = OBMM_MMAP_GRANU_PAGE; + } + if (reg->mmap_granu == OBMM_MMAP_GRANU_NONE) { + reg->mmap_granu = mmap_granu; + } else if (reg->mmap_granu != mmap_granu) { + pr_err("map with PAGE_SIZE and PMD_SIZE granu should not be mixed on the same region\n"); + return -EINVAL; + } + + vma->vm_pgoff = offset >> PAGE_SHIFT; + + if (offset >= reg->mem_size || size > reg->mem_size - offset) { + pr_err("mmap region %d: offset:%#lx, size:%#lx over region size: %#llx", + reg->regionid, offset, size, reg->mem_size); + return -EINVAL; + } + + /* + * VM flags considerations + * Compared to legacy device memory, OBMM memory has many different properties: + * 1. does not have side-effects on access (VM_IO not set) + * 2. may be used for core dump output (VM_DONTDUMP not set) + * On the other hand, OBMM and traditional device memory do have some similarities: + * 3. the mapping cannot be inherited on process fork (VM_DONTCOPY set) for now + * 4. VMA merging and expanding makes no sense (VM_DONTEXPAND set) + * 5. the VMA should not be swapped out (VM_LOCKED set) + * 6. mappable import region does not has struct page; mappable export region haves struct + * page, but cannot work as expected since its kernel linear mapping might be modified + * (VM_PFNMAP set) + */ + vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_LOCKED | VM_PFNMAP); + cacheable = o_sync ? false : true; + mem_state = get_vma_mem_state(vma->vm_flags, cacheable); + + /* initial VMA page prot used by the mapping process -- will be changed later */ + vma->vm_page_prot = mem_state_to_pgprot(mem_state); + + mutex_lock(®->state_mutex); + old_mmap_mode = reg->mmap_mode; + + if ((o_sync && reg->mmap_mode == OBMM_MMAP_NORMAL) || + (!o_sync && reg->mmap_mode == OBMM_MMAP_OSYNC)) { + pr_err("region cannot be mapped to cc and nc at the same time"); + ret = -EPERM; + goto err_mutex_unlock; + } + if (reg->mmap_mode == OBMM_MMAP_INIT) + reg->mmap_mode = o_sync ? OBMM_MMAP_OSYNC : OBMM_MMAP_NORMAL; + + ret = map_obmm_region(vma, reg, mmap_granu); + if (ret) { + pr_err("Failed to mmap region %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto reset_cur_osync; + } + + reg->mmap_count++; + mutex_unlock(®->state_mutex); + /* + * since OBMM allows changing protection by pages and we will not split + * VMA in near future. Therefore a mismatch between PTE protection and + * VMA flags is inevitable. Our current approach is to avoid all + * possible faults to change the PTE protection on the fly. Here we + * just set the page protection to the most restrictive one to guard + * against unexpected access. + */ + vma->vm_page_prot = vm_get_page_prot(VM_NONE); + if (!cacheable) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vm_flags_clear(vma, VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE); + + vma->vm_ops = &obmm_vm_ops; + + pr_debug("obmm_shmdev mmap: mem_id=%d pid=%d vma=[%#lx, %#lx] mapped: mem_state=%#x.\n", + reg->regionid, current->pid, vma->vm_start, vma->vm_end, mem_state); + + return 0; + +reset_cur_osync: + if (old_mmap_mode == OBMM_MMAP_INIT) + reg->mmap_mode = OBMM_MMAP_INIT; +err_mutex_unlock: + mutex_unlock(®->state_mutex); + return ret; } static long obmm_shm_fops_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -70,6 +388,9 @@ int obmm_shm_dev_add(struct obmm_region *reg) return -EPERM; } + reg->mmap_count = 0; + reg->mmap_mode = OBMM_MMAP_INIT; + devt = MKDEV(MAJOR(obmm_devt), reg->regionid); cdev_init(®->cdevice, &obmm_shm_fops); reg->cdevice.owner = THIS_MODULE; diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index 3dd689dabc95..0988d3d9c56b 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -122,6 +122,19 @@ struct obmm_cmd_unimport { #define OBMM_CMD_DECLARE_PREIMPORT _IOWR('x', 6, struct obmm_cmd_preimport) #define OBMM_CMD_UNDECLARE_PREIMPORT _IOW('x', 7, struct obmm_cmd_preimport) +/* 2bits */ +#define OBMM_SHM_MEM_CACHE_RESV 0x0 +#define OBMM_SHM_MEM_NORMAL 0x1 +#define OBMM_SHM_MEM_NORMAL_NC 0x2 +#define OBMM_SHM_MEM_DEVICE 0x3 +#define OBMM_SHM_MEM_CACHE_MASK 0b11 +/* 2bits */ +#define OBMM_SHM_MEM_READONLY 0x0 +#define OBMM_SHM_MEM_READEXEC 0x4 +#define OBMM_SHM_MEM_READWRITE 0x8 +#define OBMM_SHM_MEM_NO_ACCESS 0xc +#define OBMM_SHM_MEM_ACCESS_MASK 0b1100 + /* cache maintenance operations (not states) */ /* no cache maintenance (nops) */ #define OBMM_SHM_CACHE_NONE 0x0 @@ -154,6 +167,8 @@ struct obmm_cmd_preimport { #define OBMM_PREIMPORT_FLAG_MASK (0UL) #define OBMM_UNPREIMPORT_FLAG_MASK (0UL) +#define OBMM_MMAP_FLAG_HUGETLB_PMD (1UL << 63) + #if defined(__cplusplus) } #endif -- Gitee From d3183f2a84d9de4c941fa244438925010edfde56 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:42 +0800 Subject: [PATCH 20/48] obmm: Add ownership management and transfer mechanisms commit a6100c198c3be3b38237e001ea8caf016fa48429 openEuler When applications use cacheable memory through a shared model, consistency models need to be considered. For each OBMM basic granularity memory segment, users may have one of three permissions: none (PROT_NONE), read (PROT_READ), or read-write (PROT_WRITE). * When an application maps, permissions can be configured through the prot parameter of mmap * After application mapping, current permissions can be switched through obmm_set_ownership * When an application uses munmap or exits, permissions switch to none At any given time, all hosts accessing the memory (including providers and users) can only be in one of the following two states, otherwise there is a risk of data inconsistency: 1. All memory access processes have read permission or none permission (no visitor has write permission) 2. Only one host has a process with write permission, and all mapped processes on other hosts have none permission This commit implements a user-space interface that allows users to adjust the ownership of their mapped OBMM memory to correctly maintain memory consistency in a supernode. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 2 +- drivers/ub/obmm/obmm_core.c | 3 + drivers/ub/obmm/obmm_core.h | 1 + drivers/ub/obmm/obmm_ownership.c | 366 ++++++++++++++++++++++ drivers/ub/obmm/obmm_ownership.h | 56 ++++ drivers/ub/obmm/obmm_shm_dev.c | 513 ++++++++++++++++++++++++++++++- include/uapi/ub/obmm.h | 10 + 7 files changed, 944 insertions(+), 7 deletions(-) create mode 100644 drivers/ub/obmm/obmm_ownership.c create mode 100644 drivers/ub/obmm/obmm_ownership.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 082ec771a2aa..dbbcfe7cf5fb 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -3,7 +3,7 @@ obmm-y := obmm_core.o \ obmm_shm_dev.o obmm_cache.o obmm_import.o \ - obmm_export_from_user.o conti_mem_allocator.o \ + obmm_export_from_user.o obmm_ownership.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o obmm_preimport.o \ ubmempool_allocator.o obmm_export_region_ops.o \ obmm_addr_check.o obmm_preimport_prefilled.o diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index 2cf961cc5dc6..94e25b7de179 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -25,6 +25,7 @@ #include "obmm_export_region_ops.h" #include "ubmempool_allocator.h" #include "obmm_import.h" +#include "obmm_ownership.h" #include "obmm_preimport.h" #include "obmm_addr_check.h" #include "obmm_export.h" @@ -297,6 +298,8 @@ static void remove_obmm_region(struct obmm_region *reg) void uninit_obmm_region(struct obmm_region *region) { + if (region->ownership_info) + release_ownership_info(region); ida_free(&g_obmm_region_ida, region->regionid); mutex_destroy(®ion->state_mutex); } diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index f193de575ecb..f353495f3034 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -112,6 +112,7 @@ struct obmm_region { /* number of mmap */ unsigned long mmap_count; + struct obmm_ownership_info *ownership_info; /* protect ownership_info and serialize concurrent page table change requests */ struct mutex state_mutex; diff --git a/drivers/ub/obmm/obmm_ownership.c b/drivers/ub/obmm/obmm_ownership.c new file mode 100644 index 000000000000..625446b24064 --- /dev/null +++ b/drivers/ub/obmm/obmm_ownership.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include + +#include "obmm_core.h" +#include "obmm_ownership.h" + +static inline uint32_t merge_counts(uint32_t read, uint32_t write) +{ + return (read << READ_SHIFT) | (write << WRITE_SHIFT); +} + +/* + * dirty -> non-dirty: INVAL_WB + * non-dirty cacheable -> NC: INVAL + * cache capability rise: NONE + * cache operation coverage: INVAL_WB > INVAL > NONE + */ +uint8_t infer_cache_ops(uint8_t cur_state, uint8_t target_state) +{ + bool cur_dirty, cur_none, target_dirty, target_none, target_clean; + uint8_t ops = OBMM_SHM_CACHE_NONE; + + cur_dirty = ((cur_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READWRITE && + (cur_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL); + target_dirty = ((target_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READWRITE && + (target_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL); + target_clean = ((target_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READONLY && + (target_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL); + cur_none = ((cur_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_NO_ACCESS || + (cur_state & OBMM_SHM_MEM_CACHE_MASK) != OBMM_SHM_MEM_NORMAL); + target_none = ((target_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_NO_ACCESS || + (target_state & OBMM_SHM_MEM_CACHE_MASK) != OBMM_SHM_MEM_NORMAL); + if (cur_dirty && target_clean) + ops = OBMM_SHM_CACHE_WB_ONLY; + else if (cur_dirty && !target_dirty) + ops = OBMM_SHM_CACHE_WB_INVAL; + else if (!cur_none && target_none) + ops = OBMM_SHM_CACHE_INVAL; + + pr_debug("%s: target_state = %u; ops = %u\n", __func__, target_state, ops); + return ops; +} + +/** + * Calculate the local page state index corresponding to the VMA address + */ +int vma_addr_to_page_idx_local(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long offset_in_vma = addr - vma->vm_start; + + return offset_in_vma >> PAGE_SHIFT; +} + +/** + * Calculate the global page state index corresponding to the VMA address + */ +static int vma_addr_to_page_idx(struct vm_area_struct *vma, + struct obmm_local_state_info *local_state_info, unsigned long addr) +{ + return local_state_info->orig_pgoff + vma_addr_to_page_idx_local(vma, addr); +} + +/* Check if new permissions conflict with existing mappings */ +static int check_target_state_allowed(uint32_t state_count, uint8_t target_mem_state) +{ + uint32_t read_count, write_count; + + read_count = GET_R_COUNTER(state_count); + write_count = GET_W_COUNTER(state_count); + + switch (target_mem_state & OBMM_SHM_MEM_ACCESS_MASK) { + case OBMM_SHM_MEM_READONLY: + fallthrough; + case OBMM_SHM_MEM_READEXEC: + if (read_count == MAX_READ_COUNT) { + pr_warn("%s: readonly map failed, read_count=%d\n", __func__, read_count); + return -EBUSY; + } + break; + case OBMM_SHM_MEM_READWRITE: + if (write_count == MAX_WRITE_COUNT) { + pr_warn("%s: readwrite map failed, write_count=%d\n", __func__, + write_count); + return -EBUSY; + } + break; + default: + break; + } + return 0; +} + +/** + * Check whether mmap operation is possible. + * The caller holds region state_mutex lock. + */ +int check_mmap_allowed(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state) +{ + int idx_offset, page_idx_start, page_count, ret; + uint32_t state_count; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, vma->vm_start); + page_count = local_state_info->npages; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + ret = check_target_state_allowed(state_count, mem_state); + if (ret) + return ret; + } + return 0; +} + +/* + * Update the count of the corresponding permission in the state. + */ +static uint32_t update_state_count(uint32_t state_count, uint8_t target_mem_state, bool inc) +{ + uint32_t read_count, write_count; + int delta; + + delta = inc ? 1 : -1; + read_count = GET_R_COUNTER(state_count); + write_count = GET_W_COUNTER(state_count); + + /* inc new permission count */ + switch (target_mem_state & OBMM_SHM_MEM_ACCESS_MASK) { + case OBMM_SHM_MEM_NO_ACCESS: + break; + case OBMM_SHM_MEM_READONLY: + fallthrough; + case OBMM_SHM_MEM_READEXEC: + read_count += delta; + break; + case OBMM_SHM_MEM_READWRITE: + write_count += delta; + break; + default: + break; + } + return merge_counts(read_count, write_count); +} + +/** + * Check whether permissions can be modified. + * The caller holds region state_mutex lock. + */ +int check_modify_ownership_allowed(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info) +{ + int idx_offset, page_idx_start, page_count, local_page_idx_start, ret; + uint32_t state_count; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + uint8_t old_state; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, update_info->start); + local_page_idx_start = vma_addr_to_page_idx_local(vma, update_info->start); + page_count = (update_info->end - update_info->start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + old_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + + /* Check for conflicts after simulating permission changes */ + /* Remove old permissions */ + state_count = update_state_count(state_count, old_state, false); + ret = check_target_state_allowed(state_count, update_info->mem_state); + if (ret) + return ret; + } + + return 0; +} + +/** + * Increase global page permission count (for mmap). + * The caller holds region state_mutex lock. + */ +void add_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state) +{ + int idx_offset, page_idx_start, page_count; + uint32_t state_count; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, vma->vm_start); + page_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + state_count = update_state_count(state_count, mem_state, true); + info->mem_state_arr[page_idx_start + idx_offset] = state_count; + } +} + +/** + * Update global page permission count and VMA local permissions. + * The caller holds region state_mutex lock. + */ +void update_ownership(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info) +{ + int idx_offset, page_idx_start, page_count, local_page_idx_start; + uint32_t state_count; + uint8_t old_state; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, update_info->start); + local_page_idx_start = vma_addr_to_page_idx_local(vma, update_info->start); + page_count = (update_info->end - update_info->start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + old_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + /* Remove old permissions */ + state_count = update_state_count(state_count, old_state, false); + /* Add new permissions */ + state_count = update_state_count(state_count, update_info->mem_state, true); + + /* update mem_state_arr */ + info->mem_state_arr[page_idx_start + idx_offset] = state_count; + /* update vma local_state_info */ + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset] = + update_info->mem_state; + } +} + +/** + * Remove global page permission count. + * The caller holds region state_mutex lock. + */ +void remove_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + int idx_offset, page_idx_start, page_count, local_page_idx_start; + uint32_t state_count; + uint8_t old_state; + struct obmm_local_state_info *local_state_info; + struct obmm_ownership_info *info; + + info = reg->ownership_info; + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + page_idx_start = vma_addr_to_page_idx(vma, local_state_info, start); + local_page_idx_start = vma_addr_to_page_idx_local(vma, start); + page_count = (end - start) >> PAGE_SHIFT; + + for (idx_offset = 0; idx_offset < page_count; idx_offset++) { + old_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + state_count = info->mem_state_arr[page_idx_start + idx_offset]; + + /* Remove permissions */ + state_count = update_state_count(state_count, old_state, false); + info->mem_state_arr[page_idx_start + idx_offset] = state_count; + } +} + +int init_local_state_info(struct vm_area_struct *vma, uint8_t mem_state) +{ + struct obmm_local_state_info *local_state_info; + unsigned long size; + int ret, i; + + size = vma->vm_end - vma->vm_start; + local_state_info = kzalloc(sizeof(struct obmm_local_state_info), GFP_KERNEL); + if (local_state_info == NULL) + return -ENOMEM; + + local_state_info->npages = size >> PAGE_SHIFT; + local_state_info->local_mem_state_arr = vmalloc(sizeof(uint8_t) * local_state_info->npages); + + if (local_state_info->local_mem_state_arr == NULL) { + ret = -ENOMEM; + goto out_local_state_info; + } + for (i = 0; i < local_state_info->npages; i++) + local_state_info->local_mem_state_arr[i] = mem_state; + + local_state_info->orig_pgoff = vma->vm_pgoff; + vma->vm_private_data = local_state_info; + + pr_debug("init vma local state: npages=%d, state=%#x\n", local_state_info->npages, + mem_state); + return 0; +out_local_state_info: + kfree(local_state_info); + return ret; +} + +void release_local_state_info(struct vm_area_struct *vma) +{ + struct obmm_local_state_info *local_state_info; + + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + vma->vm_private_data = NULL; + vfree(local_state_info->local_mem_state_arr); + kfree(local_state_info); +} + +/* + * Initialize the global page permission count array. + * The obmm_ownership_info is created when the region is mmapped for the first time, + * so the caller need to hold region state_mutex lock. + */ +int init_ownership_info(struct obmm_region *reg) +{ + struct obmm_ownership_info *info; + int i, ret; + + if (reg->ownership_info) + return 0; + info = kzalloc(sizeof(struct obmm_ownership_info), GFP_KERNEL); + if (info == NULL) + return -ENOMEM; + + info->npages = reg->mem_size >> PAGE_SHIFT; + info->mem_state_arr = vmalloc(sizeof(uint32_t) * info->npages); + if (info->mem_state_arr == NULL) { + ret = -ENOMEM; + goto out_free_info; + } + for (i = 0; i < info->npages; i++) + info->mem_state_arr[i] = 0; + + reg->ownership_info = info; + + pr_debug("init ownership: npages=%d, state=%#x\n", info->npages, 0U); + return 0; +out_free_info: + kfree(info); + return ret; +} + +void release_ownership_info(struct obmm_region *reg) +{ + struct obmm_ownership_info *info = reg->ownership_info; + + reg->ownership_info = NULL; + vfree(info->mem_state_arr); + kfree(info); +} diff --git a/drivers/ub/obmm/obmm_ownership.h b/drivers/ub/obmm/obmm_ownership.h new file mode 100644 index 000000000000..a4c49abb8117 --- /dev/null +++ b/drivers/ub/obmm/obmm_ownership.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef OBMM_OWNERSHIP_H +#define OBMM_OWNERSHIP_H + +#include "obmm_core.h" + +#define WRITE_COUNT_BIT 16 +#define READ_COUNT_BIT 16 + +#define WRITE_MASK ((1 << WRITE_COUNT_BIT) - 1) /* 16-bit mask */ +#define READ_MASK ((1 << READ_COUNT_BIT) - 1) /* 16-bit mask */ + +#define MAX_WRITE_COUNT WRITE_MASK +#define MAX_READ_COUNT READ_MASK + +#define WRITE_SHIFT 0 +#define READ_SHIFT (WRITE_COUNT_BIT) + +#define GET_W_COUNTER(val) (((val) >> WRITE_SHIFT) & WRITE_MASK) +#define GET_R_COUNTER(val) (((val) >> READ_SHIFT) & READ_MASK) + +/* + * [ 16-31 : 0-15 ] + * state:[ Read : Write ] + * [ 65535 : 65535 ] + */ +struct obmm_ownership_info { + uint32_t *mem_state_arr; + int npages; +}; + +struct obmm_local_state_info { + uint8_t *local_mem_state_arr; + /* Original file offset in vma */ + unsigned long orig_pgoff; + int npages; +}; +int vma_addr_to_page_idx_local(struct vm_area_struct *vma, unsigned long addr); +uint8_t infer_cache_ops(uint8_t cur_state, uint8_t target_state); +int init_ownership_info(struct obmm_region *reg); +int init_local_state_info(struct vm_area_struct *vma, uint8_t mem_state); +void release_ownership_info(struct obmm_region *reg); +void release_local_state_info(struct vm_area_struct *vma); +void add_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state); +void update_ownership(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info); +int check_modify_ownership_allowed(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info); +int check_mmap_allowed(struct obmm_region *reg, struct vm_area_struct *vma, uint8_t mem_state); +void remove_mapping_permission(struct obmm_region *reg, struct vm_area_struct *vma, + unsigned long start, unsigned long end); +#endif diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index 6e135b0eb1a8..aea000dc42ac 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -12,6 +12,7 @@ #include "obmm_cache.h" #include "obmm_export_region_ops.h" #include "obmm_import.h" +#include "obmm_ownership.h" #include "obmm_shm_dev.h" static dev_t obmm_devt; @@ -19,6 +20,10 @@ static dev_t obmm_devt; static const char *obmm_shm_region_name = "OBMM_SHMDEV"; static const char *obmm_shm_rootdev_name = "obmm"; static struct device *obmm_shm_rootdev; + +static int scan_and_flush(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info); + /** * Convert VM flags to mem state */ @@ -53,10 +58,28 @@ static void obmm_vma_open(struct vm_area_struct *vma) static void obmm_vma_close(struct vm_area_struct *vma) { struct obmm_region *reg; + int ret; reg = (struct obmm_region *)vma->vm_file->private_data; mutex_lock(®->state_mutex); + /* cc-mmap */ + if (reg->mmap_mode == OBMM_MMAP_NORMAL && reg->ownership_info) { + /* flush cache */ + struct obmm_cmd_update_range update_info = { + .start = vma->vm_start, + .end = vma->vm_end, + .mem_state = OBMM_SHM_MEM_NO_ACCESS, + .cache_ops = OBMM_SHM_CACHE_INFER, + }; + ret = scan_and_flush(reg, vma, &update_info); + if (ret) + pr_err("vma close: failed to flush cache\n"); + + remove_mapping_permission(reg, vma, vma->vm_start, vma->vm_end); + release_local_state_info(vma); + } + reg->mmap_count--; if (reg->mmap_count == 0) { /* reset mmap_mode */ @@ -82,6 +105,31 @@ static int obmm_vma_mremap(struct vm_area_struct *vma __always_unused) return -EOPNOTSUPP; } +static bool validate_update_info(const struct obmm_region *region, + const struct obmm_cmd_update_range *update_info, + bool cacheable) +{ + bool valid; + + if (!cacheable) { + pr_err("Ownership operation is not applicable to o-sync mmap %d.\n", + region->regionid); + return false; + } + if (!region->ownership_info) { + pr_err("error updating ownership: ownership of memdev %d not initialized.\n", + region->regionid); + return false; + } + + valid = update_info->start < update_info->end && + IS_ALIGNED(update_info->start, PAGE_SIZE) && + IS_ALIGNED(update_info->end, PAGE_SIZE); + if (!valid) + pr_err("{pid=%d, start=%#llx end=%#llx is not a valid page range from memdev %d.\n", + current->pid, update_info->start, update_info->end, region->regionid); + return valid; +} static int obmm_vma_mprotect(struct vm_area_struct *vma __always_unused, unsigned long start __always_unused, unsigned long end __always_unused, unsigned long newflags __always_unused) @@ -322,12 +370,53 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) if (reg->mmap_mode == OBMM_MMAP_INIT) reg->mmap_mode = o_sync ? OBMM_MMAP_OSYNC : OBMM_MMAP_NORMAL; - ret = map_obmm_region(vma, reg, mmap_granu); - if (ret) { - pr_err("Failed to mmap region %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); - goto reset_cur_osync; + /* cc mmap */ + if (reg->mmap_mode == OBMM_MMAP_NORMAL) { + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) { + ret = init_local_state_info(vma, mem_state); + if (ret) { + pr_err("init local state info failed: %pe\n", ERR_PTR(ret)); + goto reset_cur_osync; + } + /* + * initialize region-level ownership info if not done yet. + * once initialized, the OBMM ownership will persist until + * the memdev goes offline + */ + ret = init_ownership_info(reg); + if (ret) + goto err_release_local_state_info; + ret = check_mmap_allowed(reg, vma, mem_state); + if (ret) + goto err_release_local_state_info; + } + /* + * initialize region-level ownership info if not done yet. + * once initialized, the OBMM ownership will persist until + * the memdev goes offline. + */ + ret = init_ownership_info(reg); + if (ret) + goto err_release_local_state_info; + ret = check_mmap_allowed(reg, vma, mem_state); + if (ret) + goto err_release_local_state_info; + + ret = map_obmm_region(vma, reg, mmap_granu); + if (ret) { + pr_err("Failed to mmap region %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto err_release_local_state_info; + } + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) + add_mapping_permission(reg, vma, mem_state); + } else { + /* cc-region with nc-mmap(o-sync) */ + ret = map_obmm_region(vma, reg, mmap_granu); + if (ret) { + pr_err("Failed to mmap region %d. ret=%pe\n", reg->regionid, ERR_PTR(ret)); + goto reset_cur_osync; + } } - reg->mmap_count++; mutex_unlock(®->state_mutex); /* @@ -350,6 +439,9 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) return 0; +err_release_local_state_info: + if (mmap_granu == OBMM_MMAP_GRANU_PAGE) + release_local_state_info(vma); reset_cur_osync: if (old_mmap_mode == OBMM_MMAP_INIT) reg->mmap_mode = OBMM_MMAP_INIT; @@ -358,9 +450,418 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) return ret; } +/* + * Verify whether mem_state is valid. + */ +static bool validate_state(uint8_t mem_state) +{ + if (mem_state & ~(OBMM_SHM_MEM_CACHE_MASK | OBMM_SHM_MEM_ACCESS_MASK)) { + pr_err("Invalid mem_state: %#x", mem_state); + return false; + } + + /* validate cacheability field */ + if ((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_CACHE_RESV) { + pr_err("Invalid mem_state: %#x -- reserved cacheability", mem_state); + return false; + } + /* currently no need to validate access permission field */ + + if (((mem_state & OBMM_SHM_MEM_ACCESS_MASK) == OBMM_SHM_MEM_READEXEC) && + (((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_DEVICE) || + (mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL_NC)) { + pr_err("Bad target mem_state configuration: NC memory cannot be executable\n"); + return false; + } + + if (((mem_state & OBMM_SHM_MEM_CACHE_MASK) == OBMM_SHM_MEM_NORMAL_NC) && + ((mem_state & OBMM_SHM_MEM_ACCESS_MASK) != OBMM_SHM_MEM_NO_ACCESS)) { + pr_err("Invalid access state transition: cannot set cacheable region to an accessible but non-cacheable state.\n"); + return false; + } + + return true; +} + +static bool validate_cache_ops(uint8_t cache_ops) +{ + if (cache_ops != OBMM_SHM_CACHE_NONE && + cache_ops != OBMM_SHM_CACHE_INVAL && + cache_ops != OBMM_SHM_CACHE_WB_INVAL) { + pr_err("Invalid cache operations: 0x%x\n", cache_ops); + return false; + } + return true; +} + +static int update_pte_prot(pte_t *ptep, unsigned long addr __always_unused, void *data) +{ + pgprot_t *pgprot = (pgprot_t *)data; + pte_t ptent_old, ptent_new; + + ptent_old = ptep_get(ptep); + + ptent_new = pfn_pte(pte_pfn(ptent_old), *pgprot); + if (pte_special(ptent_old)) + ptent_new = pte_mkspecial(ptent_new); + + set_pte(ptep, ptent_new); + return 0; +} + +static void log_ownership_change(struct obmm_region *reg, uint64_t start, uint64_t end, + uint8_t mem_state, uint8_t cache_ops) +{ + pr_debug("obmm memory %d ownership change: pid=%d start=%#llx end=%#llx mem_state=%u cache_ops=%u\n", + reg->regionid, current->pid, start, end, mem_state, cache_ops); +} + +/* the caller holds mm mmap lock */ +static long update_region_page_range(const struct obmm_cmd_update_range *update_info) +{ + int ret; + pgprot_t pgprot; + + /* decide new page protection properties */ + pgprot = mem_state_to_pgprot(update_info->mem_state); + + /* + * we currently do not update VMA properties. Instead we manipulate the + * page table entries directly: VMA-level manipulation is not + * preferrable because the users want to have page-level control. + * Sub-VMA manipulations, which involves frequent merge and split, + * require efforts. But we just do not have enough time. + */ + + pr_debug("changing pgtable pgprot to 0x%llx: pid=%d start=0x%llx end=0x%llx\n", + pgprot_val(pgprot), current->pid, update_info->start, update_info->end); + /* not sure whether this part MUST be protected by the write lock */ + ret = apply_to_page_range(current->mm, update_info->start, + update_info->end - update_info->start, update_pte_prot, &pgprot); + if (ret) { + pr_err("failed to change pgprot to 0x%llx: pid=%d start=0x%llx end=0x%llx\n", + pgprot_val(pgprot), current->pid, update_info->start, update_info->end); + return ret; + } + pr_debug("user pgtable updated\n"); + obmm_flush_tlb(current->mm); + pr_debug("TLB flushed\n"); + + return 0; +} + +static bool validate_vma_attrs(struct vm_area_struct *vma, struct file *file, + const struct obmm_cmd_update_range *update_info) +{ + if (!vma) { + pr_err("vma not found for update range: start=%#llx end=%#llx.\n", + update_info->start, update_info->end); + return false; + } + if (vma->vm_file == NULL || file == NULL || + vma->vm_file->private_data != file->private_data) { + pr_err("VA range [%#llx, %#llx) is not a mapping of the target memdev.\n", + update_info->start, update_info->end); + return false; + } + if (update_info->start < vma->vm_start || update_info->end > vma->vm_end) { + pr_err("invalid update range: request [%#llx, %#llx), full range [%#lx, %#lx)\n", + update_info->start, update_info->end, vma->vm_start, vma->vm_end); + return false; + } + return true; +} + +struct scan_context { + struct obmm_region *reg; + struct obmm_local_state_info *local_state_info; + unsigned long vma_start; + uint8_t target_mem_state; + uint8_t range_mem_state; + unsigned long local_page_idx; + unsigned long page_count; +}; + +static int do_scan_region_and_flush(struct scan_context *ctx, unsigned long region_page_idx_start, + unsigned long idx_offset_start, unsigned long idx_offset, + bool is_read) +{ + uint8_t cache_ops; + unsigned long phys_offset, size; + + cache_ops = is_read ? OBMM_SHM_CACHE_INVAL : OBMM_SHM_CACHE_WB_INVAL; + phys_offset = (region_page_idx_start + idx_offset_start) << PAGE_SHIFT; + size = (idx_offset - idx_offset_start) << PAGE_SHIFT; + return obmm_region_flush_range(ctx->reg, phys_offset, size, cache_ops); +} + +/* + * Scan the global permission count and flush the cache + * for intervals where the read permission count is 1 + * and write permission count is 0. + */ +static int scan_region_and_flush(struct scan_context *ctx, bool is_read) +{ + unsigned long idx_offset, region_page_idx_start, idx_offset_start; + struct obmm_ownership_info *info; + int ret; + uint32_t state_count, read_count, write_count; + bool start_flag, stop_flag; + + info = ctx->reg->ownership_info; + /* translate to region page idx */ + region_page_idx_start = ctx->local_page_idx + ctx->local_state_info->orig_pgoff; + + idx_offset_start = -1; + for (idx_offset = 0; idx_offset < ctx->page_count; idx_offset++) { + state_count = info->mem_state_arr[region_page_idx_start + idx_offset]; + read_count = GET_R_COUNTER(state_count); + write_count = GET_W_COUNTER(state_count); + + if (is_read) { + start_flag = (write_count == 0 && read_count == 1); + stop_flag = (write_count != 0 || read_count != 1); + } else { + start_flag = (write_count == 1); + stop_flag = (write_count != 1); + } + + if (start_flag && idx_offset_start == -1) { + idx_offset_start = idx_offset; + } else if (stop_flag && idx_offset_start != -1) { + /* flush the range [idx_offset_start, idx_offset) */ + ret = do_scan_region_and_flush(ctx, region_page_idx_start, idx_offset_start, + idx_offset, is_read); + if (ret) + return ret; + idx_offset_start = -1; + } + } + /* check if there is a range not flushed */ + if (idx_offset_start != -1) { + ret = do_scan_region_and_flush(ctx, region_page_idx_start, idx_offset_start, + idx_offset, is_read); + if (ret) + return ret; + } + return 0; +} + +static int do_scan_and_flush(struct scan_context *ctx) +{ + int ret; + uint8_t cache_ops; + unsigned long size, vm_start; + + cache_ops = infer_cache_ops(ctx->range_mem_state, ctx->target_mem_state); + vm_start = ctx->vma_start + (ctx->local_page_idx << PAGE_SHIFT); + size = (unsigned long)ctx->page_count << PAGE_SHIFT; + + log_ownership_change(ctx->reg, vm_start, vm_start + size, ctx->target_mem_state, cache_ops); + if (cache_ops == OBMM_SHM_CACHE_NONE) { + /* ignore none ops */ + ret = 0; + } else if (cache_ops == OBMM_SHM_CACHE_WB_INVAL || cache_ops == OBMM_SHM_CACHE_WB_ONLY) { + /* may need to split and flush */ + ret = scan_region_and_flush(ctx, false); + } else { + /* may need to split and flush */ + ret = scan_region_and_flush(ctx, true); + } + return ret; +} + +/* + * Scan pages in a range and flush pages which are not in use. + * The caller holds region state_mutex lock. + */ +static int scan_and_flush(struct obmm_region *reg, struct vm_area_struct *vma, + const struct obmm_cmd_update_range *update_info) +{ + struct obmm_local_state_info *local_state_info; + int idx_offset, page_count, local_page_idx_start, idx_offset_start; + uint8_t mem_state_start, mem_state; + struct scan_context ctx; + int ret; + + page_count = (update_info->end - update_info->start) >> PAGE_SHIFT; + + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + local_page_idx_start = vma_addr_to_page_idx_local(vma, update_info->start); + + ctx.reg = reg; + ctx.local_state_info = local_state_info; + ctx.vma_start = vma->vm_start; + ctx.target_mem_state = update_info->mem_state; + + idx_offset_start = 0; + mem_state_start = local_state_info->local_mem_state_arr[local_page_idx_start]; + for (idx_offset = 1; idx_offset < page_count; idx_offset++) { + mem_state = + local_state_info->local_mem_state_arr[local_page_idx_start + idx_offset]; + if (mem_state == mem_state_start) + continue; + + ctx.range_mem_state = mem_state_start; + ctx.local_page_idx = local_page_idx_start + idx_offset_start; + ctx.page_count = idx_offset - idx_offset_start; + + ret = do_scan_and_flush(&ctx); + if (ret) + return ret; + + idx_offset_start = idx_offset; + mem_state_start = mem_state; + } + + ctx.range_mem_state = mem_state_start; + ctx.local_page_idx = local_page_idx_start + idx_offset_start; + ctx.page_count = idx_offset - idx_offset_start; + ret = do_scan_and_flush(&ctx); + return ret; +} + +static void print_update_param(const struct obmm_cmd_update_range *update_info) +{ + pr_debug("obmm_set_ownership: pid=%d va=[%#llx, %#llx) mem_state=%#x cache_ops=%#x\n", + current->pid, update_info->start, update_info->end, update_info->mem_state, + update_info->cache_ops); +} + +static bool validate_ownership_perm(struct file *file, + const struct obmm_cmd_update_range *update_info) +{ + uint8_t access_param = update_info->mem_state & OBMM_SHM_MEM_ACCESS_MASK; + vm_flags_t tmp_vmflags = VM_NONE; + + if (access_param == OBMM_SHM_MEM_READONLY) + tmp_vmflags |= VM_READ; + if (access_param == OBMM_SHM_MEM_READWRITE) + tmp_vmflags |= (VM_READ | VM_WRITE); + if (access_param == OBMM_SHM_MEM_READEXEC) + tmp_vmflags |= (VM_READ | VM_EXEC); + return validate_perm(file, tmp_vmflags); +} + +static long obmm_shm_update_range(struct file *file, + const struct obmm_cmd_update_range *update_info) +{ + int ret; + unsigned long phys_offset; + struct obmm_region *reg = (struct obmm_region *)file->private_data; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct obmm_local_state_info *local_state_info; + uint8_t cache_ops; + bool cacheable; + + print_update_param(update_info); + + if (file->f_flags & O_SYNC) + cacheable = false; + else + cacheable = true; + /* quick validation without VMA info. */ + if (!validate_update_info(reg, update_info, cacheable)) + return -EINVAL; + + if (!validate_ownership_perm(file, update_info)) { + pr_err("The target permission is not allowed for the vma.\n"); + return -EPERM; + } + + if (!validate_state(update_info->mem_state)) + return -EINVAL; + + if (update_info->cache_ops != OBMM_SHM_CACHE_INFER) { + /* validate cache operations */ + if (!validate_cache_ops(update_info->cache_ops)) + return -EINVAL; + } + + mmap_read_lock(mm); + + vma = find_vma(mm, update_info->start); + if (!validate_vma_attrs(vma, file, update_info)) { + ret = -EFAULT; + goto err_unlock; + } + + local_state_info = (struct obmm_local_state_info *)vma->vm_private_data; + + mutex_lock(®->state_mutex); + + ret = check_modify_ownership_allowed(reg, vma, update_info); + if (ret) { + pr_err("check range (%llx-%llx) ownership failed: %d\n", update_info->start, + update_info->end, ret); + goto err_mutex; + } + + ret = update_region_page_range(update_info); + if (ret) + goto err_mutex; + /* + * If the user specifies a cache operation, we perform the operation + * on the range specified by update_info. Otherwise, + * we dynamically calculate whether the cache operation is needed. + */ + if (update_info->cache_ops != OBMM_SHM_CACHE_INFER) { + cache_ops = update_info->cache_ops; + log_ownership_change(reg, update_info->start, update_info->end, + update_info->mem_state, cache_ops); + /* conditionally flush L3 cache & ub controller packet queue */ + phys_offset = update_info->start - vma->vm_start + + (local_state_info->orig_pgoff << PAGE_SHIFT); + ret = obmm_region_flush_range(reg, phys_offset, + update_info->end - update_info->start, cache_ops); + } else { + ret = scan_and_flush(reg, vma, update_info); + } + + if (ret) { + /* original ownership has been lost. */ + pr_err("ownership update: failed to flush cache, ret=%pe. not recoverable.\n", + ERR_PTR(ret)); + ret = -ENOTRECOVERABLE; + goto err_mutex; + } + update_ownership(reg, vma, update_info); + + mutex_unlock(®->state_mutex); + mmap_read_unlock(mm); + + pr_debug("obmm_set_ownership: completed.\n"); + return 0; + +err_mutex: + mutex_unlock(®->state_mutex); +err_unlock: + mmap_read_unlock(mm); + return ret; +} + static long obmm_shm_fops_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return -ENOTTY; + long ret; + + switch (cmd) { + case OBMM_SHMDEV_UPDATE_RANGE: { + struct obmm_cmd_update_range cmd_update_range; + + ret = (long)copy_from_user(&cmd_update_range, (void __user *)arg, + sizeof(struct obmm_cmd_update_range)); + if (ret) { + pr_err("failed to load update_range argument"); + return -EFAULT; + } + + ret = obmm_shm_update_range(file, &cmd_update_range); + } break; + default: + ret = -ENOTTY; + } + return ret; } const struct file_operations obmm_shm_fops = { .owner = THIS_MODULE, diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index 0988d3d9c56b..7c2b29e3df73 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -150,6 +150,16 @@ struct obmm_cmd_unimport { */ #define OBMM_SHM_CACHE_INFER 0x4 +struct obmm_cmd_update_range { + /* address range to manipulate: [start, end) */ + __u64 start; + __u64 end; + __u8 mem_state; + __u8 cache_ops; +} __attribute__((aligned(8))); + +#define OBMM_SHMDEV_UPDATE_RANGE _IOW('X', 0, struct obmm_cmd_update_range) + struct obmm_cmd_preimport { __u64 pa; __u64 length; -- Gitee From f6876f29e8f00f5febe8ff8154cd6fdbceec0e57 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:42 +0800 Subject: [PATCH 21/48] obmm: Add low memory management support commit f6bc214461e913ec9380b6f692a26e05a3bb276e openEuler When system memory is insufficient, obmm will return memory from the memory pool to the system to prevent out-of-memory conditions. This helps maintain system stability by ensuring that critical system processes have access to memory when needed, rather than letting the system reach a state where the OOM killer is invoked. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 2 +- drivers/ub/obmm/obmm_core.c | 10 ++++++ drivers/ub/obmm/obmm_lowmem.c | 58 +++++++++++++++++++++++++++++++++++ drivers/ub/obmm/obmm_lowmem.h | 12 ++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/obmm_lowmem.c create mode 100644 drivers/ub/obmm/obmm_lowmem.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index dbbcfe7cf5fb..89524d84d7fb 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -5,7 +5,7 @@ obmm-y := obmm_core.o \ obmm_shm_dev.o obmm_cache.o obmm_import.o \ obmm_export_from_user.o obmm_ownership.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o obmm_preimport.o \ - ubmempool_allocator.o obmm_export_region_ops.o \ + ubmempool_allocator.o obmm_lowmem.o obmm_export_region_ops.o \ obmm_addr_check.o obmm_preimport_prefilled.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index 94e25b7de179..e7247e8b34cc 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -26,6 +26,7 @@ #include "ubmempool_allocator.h" #include "obmm_import.h" #include "obmm_ownership.h" +#include "obmm_lowmem.h" #include "obmm_preimport.h" #include "obmm_addr_check.h" #include "obmm_export.h" @@ -602,9 +603,17 @@ static int __init obmm_init(void) goto out_addr_check_exit; } + ret = lowmem_notify_init(); + if (ret) { + pr_err("failed to initialize lowmem handler. ret=%pe\n", ERR_PTR(ret)); + goto out_module_import_exit; + } + pr_info("obmm_module: init completed\n"); return ret; +out_module_import_exit: + module_preimport_exit(); out_addr_check_exit: module_addr_check_exit(); obmm_shm_dev_exit(); @@ -619,6 +628,7 @@ static void __exit obmm_exit(void) { pr_info("obmm_module: exit started\n"); + lowmem_notify_exit(); module_preimport_exit(); module_addr_check_exit(); obmm_shm_dev_exit(); diff --git a/drivers/ub/obmm/obmm_lowmem.c b/drivers/ub/obmm/obmm_lowmem.c new file mode 100644 index 000000000000..1d8cdeaa2845 --- /dev/null +++ b/drivers/ub/obmm/obmm_lowmem.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ubmempool_allocator.h" +#include "obmm_lowmem.h" + +static struct notifier_block lowmem_nb; +#define LOWMEM_NOTIFY_PRIORITY 80 + +/* May be called by lowmem notifier at a very high frequency. */ +static int obmm_lowmem_notify_handler(struct notifier_block *nb __always_unused, + unsigned long dummy __always_unused, void *parm) +{ + struct reclaim_notify_data *data = parm; + bool is_huge = false; + int i; + + pr_debug_ratelimited("got lowmem message. pid=%d sync=%d reason=%u\n", current->pid, + data->sync, data->reason); + + if (data->reason != RR_DIRECT_RECLAIM && + data->reason != RR_KSWAPD && + data->reason != RR_HUGEPAGE_RECLAIM) + return -ENOMEM; + + if (data->reason == RR_HUGEPAGE_RECLAIM) + is_huge = true; + data->nr_freed = 0; + for (i = 0; i < data->nr_nid; i++) { + pr_debug_ratelimited("contract memory on nid: %d\n", data->nid[i]); + data->nr_freed += ubmempool_contract(data->nid[i], is_huge) >> PAGE_SHIFT; + } + + return 0; +} + +int lowmem_notify_init(void) +{ + lowmem_nb.notifier_call = obmm_lowmem_notify_handler; + lowmem_nb.priority = LOWMEM_NOTIFY_PRIORITY; + return register_reclaim_notifier(&lowmem_nb); +} + +void lowmem_notify_exit(void) +{ + unregister_reclaim_notifier(&lowmem_nb); +} diff --git a/drivers/ub/obmm/obmm_lowmem.h b/drivers/ub/obmm/obmm_lowmem.h new file mode 100644 index 000000000000..7da339c6aa05 --- /dev/null +++ b/drivers/ub/obmm/obmm_lowmem.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ +#ifndef OBMM_LOW_MEM_H +#define OBMM_LOW_MEM_H + +int lowmem_notify_init(void); +void lowmem_notify_exit(void); + +#endif -- Gitee From 1d9848b633759eb86efea9a586d0efde5e4a7c9a Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Fri, 7 Nov 2025 17:45:43 +0800 Subject: [PATCH 22/48] obmm: Add sysfs interface for configuration and monitoring commit ac654ff4a9ebb99ff7666090c1e5c49c138b7d32 openEuler Add sysfs information interfaces for each obmm_shmdev device to provide detailed memory properties. The information is categorized into three types based on their usage scenarios: * Common information: Properties shared by both providers and users, located in the root sysfs path, such as memory length and type. * Provider information: Provider-specific properties, located in the 'export_info' subdirectory, like memory distribution across local NUMA nodes. * User information: User-specific properties, located in the 'import_info' subdirectory, such as physical addresses after remote mapping. Some properties are only available when specific conditions are met. For example, import_info/numa_id is only present when imported in NUMA mode. Signed-off-by: Li Ruilin Signed-off-by: Gao Chao Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 2 +- drivers/ub/obmm/obmm_core.c | 1 + drivers/ub/obmm/obmm_shm_dev.c | 2 + drivers/ub/obmm/obmm_sysfs.h | 13 ++ drivers/ub/obmm/obmm_sysfs_ub.c | 275 ++++++++++++++++++++++++++++++++ 5 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 drivers/ub/obmm/obmm_sysfs.h create mode 100644 drivers/ub/obmm/obmm_sysfs_ub.c diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index 89524d84d7fb..dc276760dfba 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0+ obmm-y := obmm_core.o \ - obmm_shm_dev.o obmm_cache.o obmm_import.o \ + obmm_shm_dev.o obmm_sysfs_ub.o obmm_cache.o obmm_import.o \ obmm_export_from_user.o obmm_ownership.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o obmm_preimport.o \ ubmempool_allocator.o obmm_lowmem.o obmm_export_region_ops.o \ diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index e7247e8b34cc..ded614003595 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -29,6 +29,7 @@ #include "obmm_lowmem.h" #include "obmm_preimport.h" #include "obmm_addr_check.h" +#include "obmm_sysfs.h" #include "obmm_export.h" #include "obmm_core.h" diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index aea000dc42ac..f651399e712d 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -10,6 +10,7 @@ #include #include "obmm_cache.h" +#include "obmm_sysfs.h" #include "obmm_export_region_ops.h" #include "obmm_import.h" #include "obmm_ownership.h" @@ -897,6 +898,7 @@ int obmm_shm_dev_add(struct obmm_region *reg) reg->cdevice.owner = THIS_MODULE; reg->device.devt = devt; reg->device.release = obmm_shm_dev_release; + reg->device.groups = obmm_region_get_attr_groups(reg); reg->device.parent = obmm_shm_rootdev; device_initialize(®->device); diff --git a/drivers/ub/obmm/obmm_sysfs.h b/drivers/ub/obmm/obmm_sysfs.h new file mode 100644 index 000000000000..d661b37c406e --- /dev/null +++ b/drivers/ub/obmm/obmm_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + */ + +#ifndef OBMM_SYSFS_H +#define OBMM_SYSFS_H + +#include "obmm_core.h" + +const struct attribute_group **obmm_region_get_attr_groups(const struct obmm_region *); + +#endif diff --git a/drivers/ub/obmm/obmm_sysfs_ub.c b/drivers/ub/obmm/obmm_sysfs_ub.c new file mode 100644 index 000000000000..22f8702f6468 --- /dev/null +++ b/drivers/ub/obmm/obmm_sysfs_ub.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + * Description:OBMM Framework's implementations. + */ + +#include +#include + +#include "obmm_sysfs.h" +#include "obmm_preimport.h" +#include "obmm_import.h" + +static ssize_t size_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_region *region; + + region = container_of(dev, struct obmm_region, device); + return sysfs_emit(buf, "0x%llx\n", region->mem_size); +} +static DEVICE_ATTR_ADMIN_RO(size); + +static const char *get_type_str(const struct obmm_region *region) +{ + return region->type == OBMM_EXPORT_REGION ? "export" : "import"; +} + +/* show some attribute of a region as string */ +#define REGION_ATTR_SHOW(tag) \ + static ssize_t tag##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *region; \ + region = container_of(dev, struct obmm_region, device); \ + return sysfs_emit(buf, "%s\n", get_##tag##_str(region)); \ + } \ + static DEVICE_ATTR_ADMIN_RO(tag) + +REGION_ATTR_SHOW(type); + +static ssize_t priv_len_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_region *region; + + region = container_of(dev, struct obmm_region, device); + return sysfs_emit(buf, "%u\n", region->priv_len); +} +static DEVICE_ATTR_ADMIN_RO(priv_len); + +/* binary attribute of the sysfs entry for priv data */ +static ssize_t priv_read(struct file *filp __always_unused, struct kobject *kobj, + struct bin_attribute *bin_attr __always_unused, char *buf, loff_t off, + size_t count) +{ + struct device *dev; + struct obmm_region *region; + + dev = kobj_to_dev(kobj); + region = container_of(dev, struct obmm_region, device); + + if (off + count > OBMM_MAX_PRIV_LEN) + count = OBMM_MAX_PRIV_LEN - off; + memcpy(buf, region->priv + off, count); + + return count; +} + +static struct bin_attribute bin_attr_priv __ro_after_init = { + .attr = { + .name = "priv", + .mode = 0400, + }, + .read = priv_read, + .size = OBMM_MAX_PRIV_LEN, +}; + +/* show some attribute of a region as string */ +#define REGION_FLAG_SHOW(flag) \ + static ssize_t flag##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *region; \ + region = container_of(dev, struct obmm_region, device); \ + return sysfs_emit(buf, "%d\n", region_##flag(region)); \ + } \ + static DEVICE_ATTR_ADMIN_RO(flag) + +REGION_FLAG_SHOW(allow_mmap); +REGION_FLAG_SHOW(memory_from_user); +REGION_FLAG_SHOW(preimport); + +/* for export region only */ +static ssize_t node_mem_size_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + unsigned int i; + ssize_t count; + struct obmm_region *reg; + struct obmm_export_region *e_reg; + + reg = container_of(dev, struct obmm_region, device); + e_reg = container_of(reg, struct obmm_export_region, region); + + count = sysfs_emit(buf, "%#llx", e_reg->node_mem_size[0]); + for (i = 1; i < e_reg->node_count; i++) + count += sysfs_emit_at(buf, count, ",%#llx", e_reg->node_mem_size[i]); + count += sysfs_emit_at(buf, count, "\n"); + return count; +} +static DEVICE_ATTR_ADMIN_RO(node_mem_size); + +static ssize_t deid_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_import_region *i_reg; + struct obmm_export_region *e_reg; + struct obmm_region *reg; + + reg = container_of(dev, struct obmm_region, device); + if (reg->type == OBMM_EXPORT_REGION) { + e_reg = container_of(reg, struct obmm_export_region, region); + return sysfs_emit(buf, EID_FMT64 "\n", EID_ARGS64_H(e_reg->deid), + EID_ARGS64_L(e_reg->deid)); + } + i_reg = container_of(reg, struct obmm_import_region, region); + return sysfs_emit(buf, EID_FMT64 "\n", EID_ARGS64_H(i_reg->deid), + EID_ARGS64_L(i_reg->deid)); +} +static DEVICE_ATTR_ADMIN_RO(deid); + +static ssize_t seid_show(struct device *dev, struct device_attribute *attr __always_unused, + char *buf) +{ + struct obmm_import_region *i_reg; + struct obmm_region *reg; + + reg = container_of(dev, struct obmm_region, device); + i_reg = container_of(reg, struct obmm_import_region, region); + + return sysfs_emit(buf, EID_FMT64 "\n", EID_ARGS64_H(i_reg->seid), + EID_ARGS64_L(i_reg->seid)); +} +static DEVICE_ATTR_ADMIN_RO(seid); + +#define COMMON_FIELD_SHOW(field, fmt) \ + static ssize_t field##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *reg; \ + struct obmm_export_region *e_reg; \ + reg = container_of(dev, struct obmm_region, device); \ + e_reg = container_of(reg, struct obmm_export_region, region); \ + return sysfs_emit(buf, fmt, e_reg->field); \ + } \ + static DEVICE_ATTR_ADMIN_RO(field) + +COMMON_FIELD_SHOW(tokenid, "0x%x\n"); +COMMON_FIELD_SHOW(uba, "0x%llx\n"); + +#define IREG_FIELD_SHOW(field, fmt) \ + static ssize_t field##_show(struct device *dev, \ + struct device_attribute *attr __always_unused, char *buf) \ + { \ + struct obmm_region *reg; \ + struct obmm_import_region *i_reg; \ + reg = container_of(dev, struct obmm_region, device); \ + i_reg = container_of(reg, struct obmm_import_region, region); \ + return sysfs_emit(buf, fmt, i_reg->field); \ + } \ + static DEVICE_ATTR_ADMIN_RO(field) + +IREG_FIELD_SHOW(pa, "0x%llx\n"); +IREG_FIELD_SHOW(numa_id, "%d\n"); +IREG_FIELD_SHOW(dcna, "0x%x\n"); +IREG_FIELD_SHOW(scna, "0x%x\n"); + +static struct attribute *root_attrs[] __ro_after_init = { + &dev_attr_size.attr, + &dev_attr_type.attr, + &dev_attr_priv_len.attr, + &dev_attr_allow_mmap.attr, + NULL, +}; + +static struct bin_attribute *root_bin_attrs[] __ro_after_init = { + &bin_attr_priv, + NULL, +}; + +static struct attribute *import_numa_attrs[] __ro_after_init = { + &dev_attr_numa_id.attr, + &dev_attr_pa.attr, + &dev_attr_dcna.attr, + &dev_attr_scna.attr, + &dev_attr_preimport.attr, + &dev_attr_seid.attr, + &dev_attr_deid.attr, + NULL, +}; +static struct attribute *import_mmap_attrs[] __ro_after_init = { + &dev_attr_pa.attr, + &dev_attr_dcna.attr, + &dev_attr_scna.attr, + &dev_attr_seid.attr, + &dev_attr_deid.attr, + NULL, +}; + +static struct attribute *export_attrs[] __ro_after_init = { + &dev_attr_node_mem_size.attr, + &dev_attr_uba.attr, + &dev_attr_tokenid.attr, + &dev_attr_memory_from_user.attr, + &dev_attr_deid.attr, + NULL, +}; + +static struct attribute_group root_attrs_group __ro_after_init = { + .name = NULL, + .attrs = root_attrs, + .bin_attrs = root_bin_attrs, +}; + +#define SYSFS_NUMA_REMOTE 1U + +static unsigned int get_import_region_sysfs_index(const struct obmm_region *region) +{ + unsigned int index = 0; + + if (region_numa_remote(region)) + index |= SYSFS_NUMA_REMOTE; + + return index; +} + +static const struct attribute_group import_attrs_groups[] = { + [0] = { + .name = "import_info", + .attrs = import_mmap_attrs, + }, + [SYSFS_NUMA_REMOTE] = { + .name = "import_info", + .attrs = import_numa_attrs, + }, +}; + +static const struct attribute_group export_attrs_group = { + .name = "export_info", + .attrs = export_attrs, +}; + +static const struct attribute_group *obmm_import_attrs_groups_list[][3] = { + { &root_attrs_group, &import_attrs_groups[0], NULL }, + { &root_attrs_group, &import_attrs_groups[1], NULL }, + { &root_attrs_group, &import_attrs_groups[2], NULL }, + { &root_attrs_group, &import_attrs_groups[3], NULL }, +}; + +static const struct attribute_group *obmm_export_attrs_groups[] = { + &root_attrs_group, + &export_attrs_group, + NULL, +}; + +const struct attribute_group **obmm_region_get_attr_groups(const struct obmm_region *region) +{ + unsigned int index; + + if (region->type == OBMM_EXPORT_REGION) + return obmm_export_attrs_groups; + index = get_import_region_sysfs_index(region); + return obmm_import_attrs_groups_list[index]; +} -- Gitee From fc7453996bb70d0e6c06ce90f04a380b384d2c8d Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Tue, 18 Nov 2025 16:31:57 +0800 Subject: [PATCH 23/48] obmm: Add resource management support for imported memory commit ac78ffc6e0c327b5e51eea5b073036e6096fd329 openEuler This commit introduces resource management functionality to the OBMM framework, allowing proper tracking and management of imported memory regions in the kernel's resource tree. This ensures that imported memory regions are properly registered in the kernel's resource management system, preventing conflicts and enabling better system visibility of these memory resources. Signed-off-by: Li Ruilin Signed-off-by: yuhao_zhang --- drivers/ub/obmm/Makefile | 2 +- drivers/ub/obmm/obmm_core.h | 4 + drivers/ub/obmm/obmm_import.c | 23 +- drivers/ub/obmm/obmm_preimport.h | 3 + drivers/ub/obmm/obmm_preimport_prefilled.c | 17 ++ drivers/ub/obmm/obmm_resource.c | 244 +++++++++++++++++++++ drivers/ub/obmm/obmm_resource.h | 22 ++ 7 files changed, 312 insertions(+), 3 deletions(-) create mode 100644 drivers/ub/obmm/obmm_resource.c create mode 100644 drivers/ub/obmm/obmm_resource.h diff --git a/drivers/ub/obmm/Makefile b/drivers/ub/obmm/Makefile index dc276760dfba..e5f25776ce87 100644 --- a/drivers/ub/obmm/Makefile +++ b/drivers/ub/obmm/Makefile @@ -6,6 +6,6 @@ obmm-y := obmm_core.o \ obmm_export_from_user.o obmm_ownership.o conti_mem_allocator.o \ obmm_export.o obmm_export_from_pool.o obmm_preimport.o \ ubmempool_allocator.o obmm_lowmem.o obmm_export_region_ops.o \ - obmm_addr_check.o obmm_preimport_prefilled.o + obmm_addr_check.o obmm_preimport_prefilled.o obmm_resource.o obj-$(CONFIG_OBMM) += obmm.o diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index f353495f3034..4d844334dbad 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -41,6 +41,8 @@ extern size_t __obmm_memseg_size; #define MAX_MEMINFO_COUNT MAX_IMPORT_COUNT +struct ubmem_resource; + enum obmm_region_type { OBMM_EXPORT_REGION, OBMM_IMPORT_REGION @@ -150,6 +152,8 @@ struct obmm_import_region { u32 dcna; u32 scna; + /* resource of the PA range */ + struct ubmem_resource *ubmem_res; u64 pa; /* imported NUMA node */ diff --git a/drivers/ub/obmm/obmm_import.c b/drivers/ub/obmm/obmm_import.c index 55ef257a3c01..2875fa9e58ec 100644 --- a/drivers/ub/obmm/obmm_import.c +++ b/drivers/ub/obmm/obmm_import.c @@ -14,6 +14,7 @@ #include "obmm_cache.h" #include "obmm_import.h" #include "obmm_preimport.h" +#include "obmm_resource.h" #include "obmm_addr_check.h" static void set_import_region_datapath(const struct obmm_import_region *i_reg, @@ -45,6 +46,7 @@ static unsigned long get_pa_range_mem_cap(u32 scna, phys_addr_t pa, size_t size) static int setup_pa(struct obmm_import_region *i_reg) { + int ret; phys_addr_t start, end; struct obmm_datapath datapath; @@ -53,8 +55,19 @@ static int setup_pa(struct obmm_import_region *i_reg) if (i_reg->region.mem_cap == 0) return -EINVAL; - if (!region_preimport(&i_reg->region)) + if (!region_preimport(&i_reg->region)) { + struct ubmem_resource *ubmem_res; + + ubmem_res = setup_ubmem_resource(i_reg->pa, i_reg->region.mem_size, false); + if (IS_ERR(ubmem_res)) { + pr_err("failed to setup ubmem resource. pa=%pa, size=%#llx, ret=%pe\n", + &i_reg->pa, i_reg->region.mem_size, ubmem_res); + return PTR_ERR(ubmem_res); + } + i_reg->ubmem_res = ubmem_res; + return 0; + } start = i_reg->pa; end = i_reg->pa + i_reg->region.mem_size - 1; @@ -62,6 +75,12 @@ static int setup_pa(struct obmm_import_region *i_reg) return preimport_commit_prefilled(start, end, &datapath, &i_reg->numa_id, &i_reg->preimport_handle); + if (ret) + return ret; + + i_reg->ubmem_res = preimport_get_resource_prefilled(i_reg->preimport_handle); + + return 0; } /* NOTE: do not clear PA in the teardown process. Error rollback procedure may rely on it. */ @@ -70,7 +89,7 @@ static int teardown_pa(struct obmm_import_region *i_reg) bool preimport = region_preimport(&i_reg->region); if (!preimport) - return 0; + return release_ubmem_resource(i_reg->ubmem_res); /* prefilled and preimport */ return preimport_uncommit_prefilled(i_reg->preimport_handle, i_reg->pa, i_reg->pa + i_reg->region.mem_size - 1); diff --git a/drivers/ub/obmm/obmm_preimport.h b/drivers/ub/obmm/obmm_preimport.h index 2b61fa665cc7..7f7c2d3a86a5 100644 --- a/drivers/ub/obmm/obmm_preimport.h +++ b/drivers/ub/obmm/obmm_preimport.h @@ -10,6 +10,8 @@ #include "obmm_core.h" struct ub_mem_info; +struct resource; +struct ubmem_resource; struct preimport_range { int numa_id; @@ -52,5 +54,6 @@ int preimport_commit_prefilled(phys_addr_t start, phys_addr_t end, const struct obmm_datapath *datapath, int *p_numa_id, void **p_handle); int preimport_uncommit_prefilled(void *handle, phys_addr_t start, phys_addr_t end); +struct ubmem_resource *preimport_get_resource_prefilled(void *handle); #endif diff --git a/drivers/ub/obmm/obmm_preimport_prefilled.c b/drivers/ub/obmm/obmm_preimport_prefilled.c index ce27ed23851d..f06df16892bf 100644 --- a/drivers/ub/obmm/obmm_preimport_prefilled.c +++ b/drivers/ub/obmm/obmm_preimport_prefilled.c @@ -9,12 +9,14 @@ #include "obmm_preimport.h" #include "obmm_addr_check.h" +#include "obmm_resource.h" struct prefilled_preimport_range { struct preimport_range pr; spinlock_t bitmap_lock; unsigned long nbits; unsigned long *bitmap; + struct ubmem_resource *ubmem_res; }; static DEFINE_MUTEX(preimport_mutex); @@ -45,12 +47,22 @@ static int create_prefilled_preimport_range(const struct obmm_cmd_preimport *cmd return -ENOMEM; } + ppr->ubmem_res = setup_ubmem_resource(cmd->pa, cmd->length, true); + if (IS_ERR(ppr->ubmem_res)) { + pr_err("failed to setup ubmem resource on preimport. pa=%pa, size=%#llx, ret=%pe\n", + &cmd->pa, cmd->length, ppr->ubmem_res); + kfree(ppr->bitmap); + kfree(ppr); + return PTR_ERR(ppr->ubmem_res); + } + *p_ppr = ppr; return 0; } static void destroy_prefilled_preimport_range(const struct prefilled_preimport_range *ppr) { + release_ubmem_resource(ppr->ubmem_res); kfree(ppr->bitmap); kfree(ppr); } @@ -333,6 +345,11 @@ int preimport_uncommit_prefilled(void *handle, phys_addr_t start, phys_addr_t en return ret; } +struct ubmem_resource *preimport_get_resource_prefilled(void *handle) +{ + return ((struct prefilled_preimport_range *)handle)->ubmem_res; +} + void preimport_init_prefilled(void) { } diff --git a/drivers/ub/obmm/obmm_resource.c b/drivers/ub/obmm/obmm_resource.c new file mode 100644 index 000000000000..6c110c664e01 --- /dev/null +++ b/drivers/ub/obmm/obmm_resource.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright(c) Huawei Technologies Co., Ltd. 2025 All rights reserved. + * Description: OBMM Framework's implementations. + * + * OBMM utilizes the iomem resource tree infrastructure to expose the physical address range of each + * OBMM memory device to other kernel components. External accessors should never modify the + * resource tree structure (with or without resource lock) and should take the resource lock while + * traversing the resource tree edges. "walk_iomem_res_desc" declared in serves as + * a valid accessing candidate. + * + * Resource Tree Structure: + * + * OBMM introduces two layers in the iomem resource tree: + * + * 1. The UBMEM resource: The UBMEM resource models a range of UB memory physical address range. + * The range of memory maps or may map remote memory. It is always a direct child of the iomem + * resource root node. + * + * 2. The OBMM memory device: The OBMM memory device resource models a range of UB memory physical + * address range which is associated with an OBMM memory device. It is always a leaf of the + * iomem resource tree. + * + * If the imported memory is manged with remote NUMA, there might an extra interior layers between + * the two metioned above. In our context we refer to it as NUMA resource. + * + * Below is an example: + * + * (iomem_resource) + * PREIMPORT_UBMEM + * System RAM (Remote) + * MEMID_1 + * MEMID_2 + * DIRECT_IMPORT_UBMEM + * System RAM (Remote) + * MEMID_3 + * DIRECT_IMPORT_UBMEM + * MEMID_4 + * + * Things become complicated when we are handling the removal of a memory device which shares the + * preimport UBMEM resource with memory devices which outlives itself. Current NUMA remote + * implementation would remove the "System RAM (Remote)" resource first and re-insert the resource + * afterwards. The living memory devices would not be preserved. Therefore it is necessary to save + * all the memory device descendents before shutting down the part of the preimport memory. + * + * Concurrency Notes: + * + * As metioned in the beginning, for external accessors, everything under ubmem_resource in the + * iomem_resource tree might be read with kernel resource_lock but should never be modified (even + * with the lock). The only exception would be memory hotplug / NUMA remote setup process which is + * triggered by OBMM. With this presumption it is safe for OBMM itself to traverse the resource tree + * without kernel resource lock. On contrast, all modifications to the subtree takes the kernel + * resource lock to avoid racing with external readers. Lastly, there is a mutex per UBMEM resource + * which synchronizes internal accesses to the subtree. + */ + +#define pr_fmt(fmt) "OBMM: resource:" fmt + +#include +#include + +#include "obmm_resource.h" + +#define MEMID_IORES_PREFIX "MEMID_" + +struct ubmem_resource { + struct resource res; + bool preimport; + + /* serialize the children save-restore process (only necessary for preimport range) */ + struct mutex mutex; + struct resource *memdev_res_shelter; +}; + +struct ubmem_resource *setup_ubmem_resource(phys_addr_t pa, resource_size_t size, bool preimport) +{ + int ret; + struct ubmem_resource *ubmem_res; + + ubmem_res = kzalloc(sizeof(struct ubmem_resource), GFP_KERNEL); + if (!ubmem_res) + return ERR_PTR(-ENOMEM); + + ubmem_res->res.start = pa; + ubmem_res->res.end = pa + size - 1; + ubmem_res->res.name = preimport ? "PREIMPORT_UBMEM" : "DIRECT_IMPORT_UBMEM"; + ubmem_res->res.flags = IORESOURCE_MEM; + + ubmem_res->preimport = preimport; + mutex_init(&ubmem_res->mutex); + + ret = insert_resource(&iomem_resource, &ubmem_res->res); + if (ret) { + kfree(ubmem_res); + return ERR_PTR(ret); + } + return ubmem_res; +} + +int release_ubmem_resource(struct ubmem_resource *ubmem_res) +{ + int ret; + + ret = remove_resource(&ubmem_res->res); + if (ret) + return ret; + mutex_destroy(&ubmem_res->mutex); + kfree(ubmem_res); + return 0; +} + +/* + * Move memdev_res saved in the sheltered list back under the refreshed NUMA resource. This function + * should be called only when the NUMA resource is present. + */ +static void restore_sheltered_memdev_locked(struct ubmem_resource *ubmem_res) +{ + struct resource *numa_res, *memdev_res; + + numa_res = ubmem_res->res.child; + + memdev_res = ubmem_res->memdev_res_shelter; + while (memdev_res) { + ubmem_res->memdev_res_shelter = memdev_res->sibling; + + memdev_res->sibling = NULL; + WARN_ON(request_resource(numa_res, memdev_res)); + + memdev_res = ubmem_res->memdev_res_shelter; + } +} + +/* + * Take memory device resource under the NUMA resource to be reset and chain them in the sheltered + * list + */ +int lock_save_memdev_descendents(struct ubmem_resource *ubmem_res) +{ + int ret; + struct resource *numa_res, *memdev_res, *next, **shelter_tail; + + if (!ubmem_res->preimport) + return 0; + + mutex_lock(&ubmem_res->mutex); + + numa_res = ubmem_res->res.child; + if (!numa_res) + return 0; + WARN_ON(numa_res->sibling != NULL); + + memdev_res = numa_res->child; + shelter_tail = &ubmem_res->memdev_res_shelter; + while (memdev_res) { + next = memdev_res->sibling; + + ret = release_resource(memdev_res); + if (ret) { + pr_err("failed to remove memdev resource %s: unexpected racing happened.\n", + memdev_res->name ? memdev_res->name : "(null)"); + goto out_restore; + } + memdev_res->child = memdev_res->parent = memdev_res->sibling = NULL; + *shelter_tail = memdev_res; + + shelter_tail = &memdev_res->sibling; + memdev_res = next; + } + return 0; + +out_restore: + restore_sheltered_memdev_locked(ubmem_res); + mutex_unlock(&ubmem_res->mutex); + return ret; +} + +void restore_unlock_memdev_descendents(struct ubmem_resource *ubmem_res) +{ + if (!ubmem_res->preimport) + return; + + restore_sheltered_memdev_locked(ubmem_res); + mutex_unlock(&ubmem_res->mutex); +} + +struct resource *setup_memdev_resource(struct ubmem_resource *ubmem_res, phys_addr_t pa, + resource_size_t size, int mem_id) +{ + int ret; + struct resource *memdev_res, *parent; + + memdev_res = kzalloc(sizeof(struct resource), GFP_KERNEL); + if (!memdev_res) + return ERR_PTR(-ENOMEM); + + memdev_res->start = pa; + memdev_res->end = pa + size - 1; + memdev_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + memdev_res->name = kasprintf(GFP_KERNEL, MEMID_IORES_PREFIX "%d", mem_id); + if (!memdev_res->name) { + ret = -ENOMEM; + goto err_free_res; + } + + /* Be a descendent of the UBMEM resource */ + parent = &ubmem_res->res; + mutex_lock(&ubmem_res->mutex); + + /* if NUMA resource is present, make itself a child of the NUMA resource */ + if (parent->child) + parent = parent->child; + + ret = request_resource(parent, memdev_res); + if (ret) { + pr_err("failed to request resource under parent %s, ret=%pe.\n", parent->name, + ERR_PTR(ret)); + goto err_unlock; + } + + mutex_unlock(&ubmem_res->mutex); + return memdev_res; + +err_unlock: + mutex_unlock(&ubmem_res->mutex); + kfree(memdev_res->name); +err_free_res: + kfree(memdev_res); + return ERR_PTR(ret); +} + +int release_memdev_resource(struct ubmem_resource *ubmem_res, struct resource *memdev_res) +{ + int ret; + + mutex_lock(&ubmem_res->mutex); + ret = release_resource(memdev_res); + mutex_unlock(&ubmem_res->mutex); + + if (ret) + return ret; + kfree(memdev_res->name); + kfree(memdev_res); + return 0; +} diff --git a/drivers/ub/obmm/obmm_resource.h b/drivers/ub/obmm/obmm_resource.h new file mode 100644 index 000000000000..067ac944e785 --- /dev/null +++ b/drivers/ub/obmm/obmm_resource.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright(c) Huawei Technologies Co., Ltd. 2025 All rights reserved. + * Description: OBMM Framework's implementations. + */ +#ifndef OBMM_RESOURCE_H +#define OBMM_RESOURCE_H + +#include + +struct ubmem_resource; + +struct ubmem_resource *setup_ubmem_resource(phys_addr_t pa, resource_size_t size, bool preimport); +int release_ubmem_resource(struct ubmem_resource *ubmem_res); +int lock_save_memdev_descendents(struct ubmem_resource *ubmem_res); +void restore_unlock_memdev_descendents(struct ubmem_resource *ubmem_res); + +struct resource *setup_memdev_resource(struct ubmem_resource *ubmem_res, phys_addr_t pa, + resource_size_t size, int mem_id); +int release_memdev_resource(struct ubmem_resource *ubmem_res, struct resource *memdev_res); + +#endif -- Gitee From 1b5eadff3cdd53fd87326807e28eedc724c85468 Mon Sep 17 00:00:00 2001 From: shixuantong Date: Sat, 29 Nov 2025 11:46:11 +0800 Subject: [PATCH 24/48] acpi: provides acpi power notifier chain commit 9692e6b35ff40850fffba44a5b6217031d3f54ea openEuler To support BMC poweroff notification under UB architecture, we need to provide notifier chain for driver to register and block poweroff until notifier chain returns success. And to avoid userspace service getting shutdown before blocking poweroff procedure, notifier chain should be called before input_report_key, which is earlier than netlink event. Signed-off-by: hubin Signed-off-by: shixuantong Signed-off-by: shixuantong --- arch/arm64/configs/tencent.config | 1 + drivers/acpi/Kconfig | 7 +++++++ drivers/acpi/button.c | 23 +++++++++++++++++++++++ include/acpi/button.h | 16 ++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 32a112ea7b4e..a48e5f9c78f8 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -117,6 +117,7 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=m CONFIG_ACPI_APEI_ERST_DEBUG=m CONFIG_ACPI_PFRUT=m +CONFIG_ACPI_POWER_NOTIFIER_CHAIN=y CONFIG_ACPI_AGDI=y CONFIG_HAVE_KVM_PINNED_VMID=y CONFIG_VIRTUALIZATION=y diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 648228831f5e..fdae63680975 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -181,6 +181,13 @@ config ACPI_BUTTON To compile this driver as a module, choose M here: the module will be called button. +config ACPI_POWER_NOTIFIER_CHAIN + bool "enable acpi power notifier chain" + depends on ACPI_BUTTON && ARM64 + default n + help + Say Y here to enable acpi power notifier chain. + config ACPI_TINY_POWER_BUTTON tristate "Tiny Power Button Driver" depends on !ACPI_BUTTON diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index c760e38df981..2394ad368d6b 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -436,6 +436,22 @@ static void acpi_lid_notify(acpi_handle handle, u32 event, void *data) acpi_lid_update_state(device, true); } +#if IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) +static BLOCKING_NOTIFIER_HEAD(acpi_power_chain_head); + +int register_acpi_power_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&acpi_power_chain_head, nb); +} +EXPORT_SYMBOL(register_acpi_power_notifier); + +int unregister_acpi_power_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&acpi_power_chain_head, nb); +} +EXPORT_SYMBOL(unregister_acpi_power_notifier); +#endif + static void acpi_button_notify(acpi_handle handle, u32 event, void *data) { struct acpi_device *device = data; @@ -449,6 +465,13 @@ static void acpi_button_notify(acpi_handle handle, u32 event, void *data) return; } +#if IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) + if (blocking_notifier_call_chain(&acpi_power_chain_head, 0, 0) == NOTIFY_BAD) { + pr_info("acpi power notifier chain: receive bad result, stop poweroff\n"); + return; + } +#endif + acpi_pm_wakeup_event(&device->dev); button = acpi_driver_data(device); diff --git a/include/acpi/button.h b/include/acpi/button.h index af2fce5d2ee3..789ed88d0d57 100644 --- a/include/acpi/button.h +++ b/include/acpi/button.h @@ -2,6 +2,8 @@ #ifndef ACPI_BUTTON_H #define ACPI_BUTTON_H +#include + #define ACPI_BUTTON_HID_POWER "PNP0C0C" #define ACPI_BUTTON_HID_LID "PNP0C0D" #define ACPI_BUTTON_HID_SLEEP "PNP0C0E" @@ -15,4 +17,18 @@ static inline int acpi_lid_open(void) } #endif /* IS_ENABLED(CONFIG_ACPI_BUTTON) */ +#if IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) +int register_acpi_power_notifier(struct notifier_block *nb); +int unregister_acpi_power_notifier(struct notifier_block *nb); +#else +static inline int register_acpi_power_notifier(struct notifier_block *nb) +{ + return -EINVAL; +}; +static inline int unregister_acpi_power_notifier(struct notifier_block *nb) +{ + return -EINVAL; +}; +#endif /* IS_ENABLED(CONFIG_ACPI_POWER_NOTIFIER_CHAIN) */ + #endif /* ACPI_BUTTON_H */ -- Gitee From 39df1d432e06e4d33ef477f1750666968e3eccf8 Mon Sep 17 00:00:00 2001 From: shixuantong Date: Mon, 1 Dec 2025 16:27:06 +0800 Subject: [PATCH 25/48] drivers/ub/sentry: add sentry_reporter and sentry_msg_help commit 1e3e4971f8d7c3b195b43b4cb46ae00d079ba466 openEuler Listens to kernel event(eg. oom) and provides device / api to send msg. Support reporting the following abnormal events: 1. report oom event 2. report power off evnet 3. report ub mem err event and send SIGBUS signal to the process who accessing the faulty memory. Signed-off-by: guodashun Signed-off-by: shixuantong Signed-off-by: shixuantong --- arch/arm64/configs/tencent.config | 4 + drivers/ub/Kconfig | 1 + drivers/ub/Makefile | 1 + drivers/ub/sentry/Kconfig | 10 + drivers/ub/sentry/Makefile | 8 + drivers/ub/sentry/sentry_reporter.c | 597 ++++++++++++++++++++++++++++ drivers/ub/sentry/smh_common_type.h | 78 ++++ drivers/ub/sentry/smh_core.c | 152 +++++++ drivers/ub/sentry/smh_message.c | 380 ++++++++++++++++++ drivers/ub/sentry/smh_message.h | 24 ++ 10 files changed, 1255 insertions(+) create mode 100644 drivers/ub/sentry/Kconfig create mode 100644 drivers/ub/sentry/Makefile create mode 100644 drivers/ub/sentry/sentry_reporter.c create mode 100644 drivers/ub/sentry/smh_common_type.h create mode 100644 drivers/ub/sentry/smh_core.c create mode 100644 drivers/ub/sentry/smh_message.c create mode 100644 drivers/ub/sentry/smh_message.h diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index a48e5f9c78f8..f99a133749e3 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1847,6 +1847,10 @@ CONFIG_UB_UMMU_BYPASSDEV=y CONFIG_OBMM=m +# UB sentry +CONFIG_UB_SENTRY=m +# end of UB sentry + # URMA CONFIG_UB_URMA=m diff --git a/drivers/ub/Kconfig b/drivers/ub/Kconfig index 9321fefab35d..0b7cb0ef16cf 100644 --- a/drivers/ub/Kconfig +++ b/drivers/ub/Kconfig @@ -17,6 +17,7 @@ source "drivers/ub/ubus/Kconfig" source "drivers/ub/ubfi/Kconfig" source "drivers/ub/ubase/Kconfig" source "drivers/ub/obmm/Kconfig" +source "drivers/ub/sentry/Kconfig" config UB_URMA tristate "Unified Bus (UB) urma support" default m diff --git a/drivers/ub/Makefile b/drivers/ub/Makefile index a28b0c0e7ce5..d1dd2267abe0 100644 --- a/drivers/ub/Makefile +++ b/drivers/ub/Makefile @@ -5,3 +5,4 @@ obj-y += ubfi/ obj-$(CONFIG_UB_URMA) += urma/ obj-$(CONFIG_UB_UBASE) += ubase/ obj-y += obmm/ +obj-$(CONFIG_UB_SENTRY) += sentry/ diff --git a/drivers/ub/sentry/Kconfig b/drivers/ub/sentry/Kconfig new file mode 100644 index 000000000000..b7501b0f0c51 --- /dev/null +++ b/drivers/ub/sentry/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config UB_SENTRY + tristate "sentry message report" + depends on UB && ACPI_POWER_NOTIFIER_CHAIN + default m + help + Listens to kernel event(eg. oom) and send sentry msg to userspace + Provides device for userspace to read kernel message and reply ack + Provides kernel api to send message to userspace and wait for result diff --git a/drivers/ub/sentry/Makefile b/drivers/ub/sentry/Makefile new file mode 100644 index 000000000000..610582ab3b90 --- /dev/null +++ b/drivers/ub/sentry/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the sentry drivers. + +obj-$(CONFIG_UB_SENTRY) += sentry_msg_helper.o +obj-$(CONFIG_UB_SENTRY) += sentry_reporter.o + +sentry_msg_helper-y := smh_core.o smh_message.o diff --git a/drivers/ub/sentry/sentry_reporter.c b/drivers/ub/sentry/sentry_reporter.c new file mode 100644 index 000000000000..76ebb3d86b6f --- /dev/null +++ b/drivers/ub/sentry/sentry_reporter.c @@ -0,0 +1,597 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: report oom and reboot event to userspace + * Author: Luckky + * Create: 2025-02-17 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" + +#define REBOOT_RESULT_SUCCESS 0 +#define MAX_TIMEOUT 3600000 +#define FD_MODE 0 +#define NUMA_MODE 1 + +static DEFINE_RATELIMIT_STATE(oom_log_rs, HZ, 5); + +static unsigned int reboot_timeout_ms = 30000; +static unsigned int oom_timeout_ms = 30000; +module_param(reboot_timeout_ms, uint, 0444); +module_param(oom_timeout_ms, uint, 0444); + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][reporter]: " fmt + +static struct proc_dir_entry *g_sentry_reporter_proc_dir; + +static int g_ub_mem_fault_with_kill = 1; +static bool g_oom_enable; +static bool g_power_off_enable; +static bool g_ub_mem_fault_enable; + +/** + * check_if_timeout_param_valid - Validate timeout parameters + * + * Return: 0 if valid, negative error code otherwise + */ +static int check_if_timeout_param_valid(void) +{ + if (reboot_timeout_ms > MAX_TIMEOUT) { + pr_err("reboot timeout is out of range! (valid range: [0, %u], current value: %u)\n", + MAX_TIMEOUT, reboot_timeout_ms); + return -EINVAL; + } + + if (oom_timeout_ms > MAX_TIMEOUT) { + pr_err("oom timeout is out of range! (valid range: [0, %u], current value: %u)\n", + MAX_TIMEOUT, oom_timeout_ms); + return -EINVAL; + } + + return 0; +} + +/** + * smh_message_retry_send - Send message with retry mechanism + * @msg: Message to send + * @ack: Whether acknowledgment is required + * + * Return: 0 on success, negative error code on failure + */ +static int smh_message_retry_send(struct sentry_msg_helper_msg *msg, bool ack) +{ + int ret; + int i; + int times = msg->timeout_time / MILLISECONDS_OF_EACH_MDELAY; + + msg->start_send_time = ktime_get_ns(); + msg->msgid = smh_get_new_msg_id(); + + for (i = 0; i < times; i++) { + uint64_t cur_time = ktime_get_ns(); + + ret = smh_message_send(msg, ack); + if (!ack) + return ret; + + ret = smh_message_get_ack(msg); + if (ret) + return 0; + + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY - + (ktime_get_ns() - cur_time) / NSEC_PER_MSEC); + } + + if (msg->type == SMH_MESSAGE_OOM && __ratelimit(&oom_log_rs)) + pr_info("message %llu is timeout\n", msg->msgid); + + return -ETIMEDOUT; +} + +/** + * acpi_power_notifier_callback - ACPI power button notification handler + * @nb: Notifier block + * @action: Action type + * @data: Callback data + * + * Return: NOTIFY_OK on success, NOTIFY_BAD on failure + */ +static int acpi_power_notifier_callback(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret; + struct sentry_msg_helper_msg msg; + + if (!g_power_off_enable) + return NOTIFY_OK; + + msg.type = SMH_MESSAGE_POWER_OFF; + msg.timeout_time = reboot_timeout_ms + REPORT_COMM_TIME; + + pr_info("send sentry reboot message\n"); + ret = smh_message_retry_send(&msg, true); + if (ret || msg.res != REBOOT_RESULT_SUCCESS) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static struct notifier_block acpi_power_notifier = { + .notifier_call = acpi_power_notifier_callback, + .priority = INT_MAX, +}; + +/** + * lowmem_notifier_callback - Low memory notification handler + * @nb: Notifier block + * @action: Action type + * @parm: Callback data containing reclaim information + * + * Return: NOTIFY_OK + */ +static int lowmem_notifier_callback(struct notifier_block *nb, + unsigned long action, void *parm) +{ + struct reclaim_notify_data *data = parm; + struct sentry_msg_helper_msg msg; + int ret; + int i; + + if (!g_oom_enable) + return NOTIFY_OK; + + if (data->reason > RR_HUGEPAGE_RECLAIM) + return NOTIFY_OK; + + if (__ratelimit(&oom_log_rs)) { + pr_info("got lowmem message. pid=%d sync=%d reason=%d\n", + current->pid, data->sync, data->reason); + } + + for (i = 0; i < OOM_EVENT_MAX_NUMA_NODES; i++) + msg.helper_msg_info.oom_info.nid[i] = -1; + + msg.type = SMH_MESSAGE_OOM; + msg.helper_msg_info.oom_info.nr_nid = data->nr_nid > OOM_EVENT_MAX_NUMA_NODES ? + OOM_EVENT_MAX_NUMA_NODES : data->nr_nid; + for (i = 0; i < msg.helper_msg_info.oom_info.nr_nid; i++) + msg.helper_msg_info.oom_info.nid[i] = data->nid[i]; + + msg.helper_msg_info.oom_info.sync = data->sync; + msg.helper_msg_info.oom_info.timeout = oom_timeout_ms; + msg.helper_msg_info.oom_info.reason = data->reason; + msg.timeout_time = oom_timeout_ms + REPORT_COMM_TIME; + + ret = smh_message_retry_send(&msg, data->sync); + if (ret) + data->nr_freed = 0; + else + data->nr_freed = msg.res; + + return NOTIFY_OK; +} + +static struct notifier_block lowmem_notifier = { + .notifier_call = lowmem_notifier_callback, + .priority = INT_MAX, +}; + +/** + * proc_oom_enable_write - Write handler for oom proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_oom_enable_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char oom_enable[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for oom, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(oom_enable, ubuf, cnt); + if (ret) { + pr_err("set oom failed\n"); + return -EFAULT; + } + + if (cnt > 0 && oom_enable[cnt - 1] == '\n') + oom_enable[cnt - 1] = '\0'; + + if (strcmp(oom_enable, "on") == 0) { + g_oom_enable = true; + } else if (strcmp(oom_enable, "off") == 0) { + g_oom_enable = false; + } else { + pr_err("invalid value for oom\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_oom_enable_show - Read handler for oom proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_oom_enable_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_oom_enable ? "on" : "off"; + size_t len = g_oom_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_oom_file_operations = { + .proc_read = proc_oom_enable_show, + .proc_write = proc_oom_enable_write, +}; + +/** + * proc_power_off_enable_write - Write handler for power_off proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_power_off_enable_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char power_off_enable[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for power_off, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(power_off_enable, ubuf, cnt); + if (ret) { + pr_err("set power_off failed\n"); + return -EFAULT; + } + + if (cnt > 0 && power_off_enable[cnt - 1] == '\n') + power_off_enable[cnt - 1] = '\0'; + + if (strcmp(power_off_enable, "on") == 0) { + g_power_off_enable = true; + } else if (strcmp(power_off_enable, "off") == 0) { + g_power_off_enable = false; + } else { + pr_err("invalid value for power_off\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_power_off_enable_show - Read handler for power_off proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_power_off_enable_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_power_off_enable ? "on" : "off"; + size_t len = g_power_off_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_power_off_enable_file_operations = { + .proc_read = proc_power_off_enable_show, + .proc_write = proc_power_off_enable_write, +}; + +/** + * proc_ub_mem_fault_enable_write - Write handler for ub_mem_fault proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_enable_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char ub_mem_fault_enable[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for ub_mem_fault, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(ub_mem_fault_enable, ubuf, cnt); + if (ret) { + pr_err("set ub_mem_fault failed\n"); + return -EFAULT; + } + + if (cnt > 0 && ub_mem_fault_enable[cnt - 1] == '\n') + ub_mem_fault_enable[cnt - 1] = '\0'; + + if (strcmp(ub_mem_fault_enable, "on") == 0) { + g_ub_mem_fault_enable = true; + } else if (strcmp(ub_mem_fault_enable, "off") == 0) { + g_ub_mem_fault_enable = false; + } else { + pr_err("invalid value for ub_mem_fault\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_ub_mem_fault_enable_show - Read handler for ub_mem_fault proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_enable_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_ub_mem_fault_enable ? "on" : "off"; + size_t len = g_ub_mem_fault_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_ub_mem_fault_enable_file_operations = { + .proc_read = proc_ub_mem_fault_enable_show, + .proc_write = proc_ub_mem_fault_enable_write, +}; + +/** + * proc_ub_mem_fault_with_kill_write - Write handler for ub_mem_fault_with_kill proc file + * @file: File structure + * @ubuf: User buffer + * @cnt: Number of bytes to write + * @ppos: File position + * + * Return: Number of bytes written on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_with_kill_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char ub_mem_fault_with_kill[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for ub_mem_fault_with_kill, the value can only be 'off' or 'on'.\n"); + return -EINVAL; + } + + ret = copy_from_user(ub_mem_fault_with_kill, ubuf, cnt); + if (ret) { + pr_err("set ub_mem_fault_with_kill failed\n"); + return -EFAULT; + } + + if (cnt > 0 && ub_mem_fault_with_kill[cnt - 1] == '\n') + ub_mem_fault_with_kill[cnt - 1] = '\0'; + + if (strcmp(ub_mem_fault_with_kill, "on") == 0) { + g_ub_mem_fault_with_kill = 1; + } else if (strcmp(ub_mem_fault_with_kill, "off") == 0) { + g_ub_mem_fault_with_kill = 0; + } else { + pr_err("invalid value for ub_mem_fault_with_kill\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_ub_mem_fault_with_kill_show - Read handler for ub_mem_fault_with_kill proc file + * @file: File structure + * @buf: User buffer + * @count: Number of bytes to read + * @ppos: File position + * + * Return: Number of bytes read on success, negative error code on failure + */ +static ssize_t proc_ub_mem_fault_with_kill_show(struct file *file, + char __user *buf, + size_t count, loff_t *ppos) +{ + const char *value = g_ub_mem_fault_with_kill ? "on" : "off"; + size_t len = g_ub_mem_fault_with_kill ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, value, len); +} + +static const struct proc_ops proc_ub_mem_fault_with_kill_file_operations = { + .proc_read = proc_ub_mem_fault_with_kill_show, + .proc_write = proc_ub_mem_fault_with_kill_write, +}; + +/** + * ub_mem_ras_handler - UB memory RAS error handler + * @phys_addr: Physical address of the error + * @err_type: Error type + * + * Return: 0 on success + */ +static int ub_mem_ras_handler(uint64_t phys_addr, enum ras_err_type err_type) +{ + struct sentry_msg_helper_msg msg; + struct page *page; + int ret; + + if (!g_ub_mem_fault_enable) + return NOTIFY_OK; + + pr_info("ub mem error: type=%d\n", err_type); + + msg.helper_msg_info.ub_mem_info.pa = phys_addr; + msg.helper_msg_info.ub_mem_info.raw_ubus_mem_err_type = err_type; + msg.msgid = smh_get_new_msg_id(); + msg.type = SMH_MESSAGE_UB_MEM_ERR; + msg.start_send_time = ktime_get_ns(); + msg.timeout_time = ULLONG_MAX; + + if ((err_type == REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR || + err_type == UB_MEM_READ_DATA_ERR || + err_type == UB_MEM_FLOW_POISON || + err_type == UB_MEM_READ_DATA_POISON || + err_type == UB_MEM_READ_DATA_RESPERR) && g_ub_mem_fault_with_kill) { + msg.helper_msg_info.ub_mem_info.fault_with_kill = 1; + } else { + msg.helper_msg_info.ub_mem_info.fault_with_kill = 0; + } + + /* Check mode (FD or NUMA) */ + page = pfn_to_online_page(PHYS_PFN(phys_addr)); + + if (!page) { + /* FD mode */ + msg.helper_msg_info.ub_mem_info.mem_type = FD_MODE; + pr_info("ub mem error: mem mode is fd mode\n"); + } else { + /* NUMA mode */ + msg.helper_msg_info.ub_mem_info.mem_type = NUMA_MODE; + pr_info("ub mem error: mem mode is numa mode\n"); + if (msg.helper_msg_info.ub_mem_info.fault_with_kill) + memory_failure_queue(PHYS_PFN(phys_addr), 0); + } + + ret = smh_message_send(&msg, false); + if (ret) + pr_err("Failed to send remote message to userspace. %d\n", ret); + + return 0; +} + +/** + * sentry_reporter_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + */ +static int __init sentry_reporter_init(void) +{ + int ret; + + ret = check_if_timeout_param_valid(); + if (ret) + return ret; + + g_sentry_reporter_proc_dir = proc_mkdir_mode("sentry_reporter", + PROC_DIR_PERMISSION, NULL); + if (!g_sentry_reporter_proc_dir) { + pr_err("create /proc/sentry_reporter dir failed\n"); + return -ENOMEM; + } + + ret = sentry_create_proc_file("ub_mem_fault_with_kill", + g_sentry_reporter_proc_dir, + &proc_ub_mem_fault_with_kill_file_operations); + ret |= sentry_create_proc_file("oom", + g_sentry_reporter_proc_dir, + &proc_oom_file_operations); + ret |= sentry_create_proc_file("power_off", + g_sentry_reporter_proc_dir, + &proc_power_off_enable_file_operations); + ret |= sentry_create_proc_file("ub_mem_fault", + g_sentry_reporter_proc_dir, + &proc_ub_mem_fault_enable_file_operations); + if (ret < 0) + goto remove_proc_dir; + + ret = register_acpi_power_notifier(&acpi_power_notifier); + pr_info("power notifier register %s\n", ret ? "failed" : "successful"); + if (ret) + goto remove_proc_dir; + + ret = register_reclaim_notifier(&lowmem_notifier); + pr_info("lowmem notifier register %s\n", ret ? "failed" : "successful"); + if (ret) + goto unregister_power_notifier; + + ub_mem_ras_handler_register(ub_mem_ras_handler); + pr_info("ubus notifier register successful\n"); + + return 0; + +unregister_power_notifier: + unregister_acpi_power_notifier(&acpi_power_notifier); + pr_info("power notifier unregistered\n"); +remove_proc_dir: + proc_remove(g_sentry_reporter_proc_dir); + pr_info("proc file removed\n"); + return ret; +} + +/** + * sentry_reporter_exit - Module cleanup function + */ +static void __exit sentry_reporter_exit(void) +{ + unregister_acpi_power_notifier(&acpi_power_notifier); + pr_info("power notifier unregistered\n"); + + unregister_reclaim_notifier(&lowmem_notifier); + pr_info("lowmem notifier unregistered\n"); + + ub_mem_ras_handler_unregister(); + pr_info("ub_mem notifier unregistered\n"); + + proc_remove(g_sentry_reporter_proc_dir); + pr_info("proc file removed\n"); +} + +module_init(sentry_reporter_init); +module_exit(sentry_reporter_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Luckky"); +MODULE_DESCRIPTION("sentry reporter: report kernel events to userspace"); +MODULE_VERSION("1.0"); diff --git a/drivers/ub/sentry/smh_common_type.h b/drivers/ub/sentry/smh_common_type.h new file mode 100644 index 000000000000..9ae54a2b0a43 --- /dev/null +++ b/drivers/ub/sentry/smh_common_type.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Common Header File for Sentry Module + * Author: Luckky + * Create: 2025-02-17 + */ + +#ifndef SMH_COMMON_TYPE_H +#define SMH_COMMON_TYPE_H + +#include +#include +#include +#include +#include + +#define SMH_TYPE ('}') +#define OOM_EVENT_MAX_NUMA_NODES 8 +#define REPORT_COMM_TIME 5000 +#define MILLISECONDS_OF_EACH_MDELAY 1000 +#define ENABLE_VALUE_MAX_LEN 4 // 'off' + '\0' + +#define URMA_REBUILD_THRESHOLD 3 +#define URMA_ACK_RETRY_NUM 10 + +#define PROC_FILE_PERMISSION 0600 +#define PROC_DIR_PERMISSION 0550 + +enum { + SMH_CMD_MSG_ACK = 0x10, +}; + +#define SMH_MSG_ACK _IO(SMH_TYPE, SMH_CMD_MSG_ACK) + +enum sentry_msg_helper_msg_type { + SMH_MESSAGE_POWER_OFF, + SMH_MESSAGE_OOM, + SMH_MESSAGE_UB_MEM_ERR, + SMH_MESSAGE_UNKNOWN, +}; + +struct sentry_msg_helper_msg { + enum sentry_msg_helper_msg_type type; + uint64_t msgid; + uint64_t start_send_time; + uint64_t timeout_time; + // reboot_info is empty + union { + struct { + int nr_nid; + int nid[OOM_EVENT_MAX_NUMA_NODES]; + int sync; + int timeout; + int reason; + } oom_info; + struct { + uint64_t pa; + int mem_type; + int fault_with_kill; + enum ras_err_type raw_ubus_mem_err_type; + } ub_mem_info; + } helper_msg_info; + unsigned long res; +}; + +static inline int sentry_create_proc_file(const char *name, struct proc_dir_entry *parent, + const struct proc_ops *proc_ops) +{ + int ret = 0; + + if (!proc_create(name, PROC_FILE_PERMISSION, parent, proc_ops)) { + pr_err("create proc file %s failed.\n", name); + ret = -ENOMEM; + } + return ret; +} +#endif diff --git a/drivers/ub/sentry/smh_core.c b/drivers/ub/sentry/smh_core.c new file mode 100644 index 000000000000..61103c551228 --- /dev/null +++ b/drivers/ub/sentry/smh_core.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: Sentry Msg Helper + * Author: Luckky + * Create: 2025-02-17 + */ + +#include +#include +#include +#include +#include + +#include "smh_message.h" + +#define SMH_DEV_NAME "sentry_msg_helper" + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][message_helper]: " fmt + +/** + * smh_dev_ioctl - IOCTL handler for sentry message helper device + * @file: File structure pointer + * @cmd: IOCTL command + * @arg: User space argument + * + * Return: 0 on success, negative error code on failure + */ +static long smh_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case SMH_MSG_ACK: { + struct sentry_msg_helper_msg cmd_msg; + + if (copy_from_user(&cmd_msg, (void __user *)arg, sizeof(cmd_msg))) + return -EFAULT; + + ret = smh_message_ack(&cmd_msg); + break; + } + default: + ret = -EINVAL; + } + + return ret; +} + +/** + * smh_dev_open - Open handler for sentry message helper device + * @inode: Inode structure pointer + * @file: File structure pointer + * + * Return: 0 on success + */ +static int smh_dev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +/** + * smh_dev_flush - Flush handler for sentry message helper device + * @file: File structure pointer + * @owner: File owner ID + * + * Return: 0 on success + */ +static int smh_dev_flush(struct file *file, fl_owner_t owner) +{ + return 0; +} + +/** + * smh_dev_read - Read handler for sentry message helper device + * @filp: File structure pointer + * @buf: User space buffer + * @count: Number of bytes to read + * @f_pos: File position pointer + * + * Return: Number of bytes read on success, zero or negative error code on failure + */ +static ssize_t smh_dev_read(struct file *filp, char __user *buf, size_t count, + loff_t *f_pos) +{ + if (count != sizeof(struct sentry_msg_helper_msg)) { + pr_err("smh_dev_read: read size mismatch\n"); + return 0; + } + + return smh_message_get(buf); +} + +static const struct file_operations smh_dev_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = smh_dev_ioctl, + .open = smh_dev_open, + .read = smh_dev_read, + .flush = smh_dev_flush, +}; + +static struct miscdevice smh_dev_handle = { + .minor = MISC_DYNAMIC_MINOR, + .name = SMH_DEV_NAME, + .fops = &smh_dev_fops, +}; + +/** + * smh_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + */ +static int __init smh_init(void) +{ + int ret; + + ret = smh_message_init(); + if (ret) { + pr_err("Failed to init smh message. retval=%d\n", ret); + return ret; + } + pr_info("smh init successfully.\n"); + + ret = misc_register(&smh_dev_handle); + if (ret) { + pr_err("Failed to register smh device. retval=%d\n", ret); + smh_message_exit(); + return ret; + } + pr_info("smh device registered successfully.\n"); + + return 0; +} + +/** + * smh_exit - Module cleanup function + */ +static void __exit smh_exit(void) +{ + misc_deregister(&smh_dev_handle); + smh_message_exit(); +} + +module_init(smh_init); +module_exit(smh_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Luckky"); +MODULE_DESCRIPTION("SMH: Sentry Msg Helper"); +MODULE_VERSION("1.0"); diff --git a/drivers/ub/sentry/smh_message.c b/drivers/ub/sentry/smh_message.c new file mode 100644 index 000000000000..971e659244d4 --- /dev/null +++ b/drivers/ub/sentry/smh_message.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: Sentry Msg Helper + * Author: Luckky + * Create: 2025-02-17 + */ + +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" + +static DEFINE_RATELIMIT_STATE(msg_log_rs, HZ, 10); + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][message_helper]: " fmt + +#define RM_LOG_INFO(fmt, ...) \ + do { \ + if (__ratelimit(&msg_log_rs)) { \ + printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__); \ + } \ + } while (0) + + +#define RM_LOG_WARN(fmt, ...) \ + do { \ + if (__ratelimit(&msg_log_rs)) { \ + printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__); \ + } \ + } while (0) + +#define RM_LOG_ERR(fmt, ...) \ + do { \ + if (__ratelimit(&msg_log_rs)) { \ + printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__); \ + } \ + } while (0) + +#define SMH_MESSAGE_BUFFER_LENGTH 256 +#define SMH_MESSAGE_BUFFER_MAX_LENGTH 4096 + + +static int smh_message_buffer_length = SMH_MESSAGE_BUFFER_LENGTH; +module_param(smh_message_buffer_length, int, 0444); + +/** + * FIND_AND_REMOVE_TIMEOUT_FROM_LIST - Macro to find and remove message from list + * @handle: Pointer to store found handle + * @lock: Spinlock to protect the list + * @list_head: List head to search + * @member: List member name in the structure + * @msgid_target: Target message ID to find + * @found: Boolean to indicate if message was found + * + * This macro searches for a message in the list by ID, removes timeout messages, + * and returns the found message handle. + */ +#define FIND_AND_REMOVE_TIMEOUT_FROM_LIST(handle, lock, list_head, member, msgid_target, found) \ + do { \ + spin_lock(lock); \ + { \ + typeof(handle) __cur, __tmp; \ + list_for_each_entry_safe(__cur, __tmp, list_head, member) { \ + if (check_msg_is_timeout(&__cur->msg)) { \ + list_del(&__cur->member); \ + kfree(__cur); \ + handle = NULL; \ + continue; \ + } \ + if (__cur->msg.msgid == (msgid_target)) { \ + found = true; \ + list_del(&__cur->member); \ + handle = __cur; \ + break; \ + } \ + } \ + } \ + spin_unlock(lock); \ + } while (0) + +struct smh_msg_handler { + struct sentry_msg_helper_msg msg; + bool ack; + struct list_head ack_list; + struct list_head get_list; +}; + +struct smh_msg_ctx { + struct kfifo msgbuf_send; + spinlock_t msgbuf_send_lock; + + struct list_head msgbuf_ack; + spinlock_t msgbuf_ack_lock; + + struct list_head msgbuf_get; + spinlock_t msgbuf_get_lock; + + struct wait_queue_head user_wq; +}; + +static struct smh_msg_ctx msg_ctx; +static atomic64_t message_id_generator; /* [1, message_id_generator] */ + +/** + * smh_get_new_msg_id - Generate a new unique message ID + * + * Return: New message ID + */ +uint64_t smh_get_new_msg_id(void) +{ + return atomic64_inc_return(&message_id_generator); +} +EXPORT_SYMBOL(smh_get_new_msg_id); + +/** + * check_msg_is_timeout - Check if message has timed out + * @msg: Message to check + * + * Return: true if timeout, false otherwise + */ +static bool check_msg_is_timeout(struct sentry_msg_helper_msg *msg) +{ + uint64_t now = ktime_get_ns(); + uint64_t interval_time = (now - msg->start_send_time) / NSEC_PER_MSEC; + + return interval_time > msg->timeout_time; +} + +/** + * smh_message_send - Send a message through the message helper + * @msg: Message to send + * @ack: Whether acknowledgment is required + * + * Return: 0 on success, negative error code on failure + */ +int smh_message_send(struct sentry_msg_helper_msg *msg, bool ack) +{ + int ret = 0; + struct smh_msg_handler *handle; + + if (!msg->msgid) { + RM_LOG_ERR("please set the correct msgid by 'smh_get_new_msg_id', stop to send this msg\n"); + return -EINVAL; + } + + handle = kzalloc(sizeof(*handle), GFP_ATOMIC); + if (!handle) { + RM_LOG_ERR("failed to alloc message handle\n"); + return -ENOMEM; + } + + handle->msg = *msg; + handle->ack = ack; + + RM_LOG_INFO("smh_message_send: %llu start\n", msg->msgid); + + ret = kfifo_in_spinlocked(&msg_ctx.msgbuf_send, &handle, + sizeof(handle), &msg_ctx.msgbuf_send_lock); + if (!ret) { + RM_LOG_ERR("error sending message %llu: buffer is full; message dropped\n", + msg->msgid); + kfree(handle); + return -EAGAIN; + } + + /* Check if someone is waiting */ + if (waitqueue_active(&msg_ctx.user_wq)) + wake_up(&msg_ctx.user_wq); + + RM_LOG_INFO("smh_message_send: %llu end\n", msg->msgid); + + return 0; +} +EXPORT_SYMBOL(smh_message_send); + +/** + * smh_message_get - Get a message from the message helper + * @buf: User space buffer to copy message to + * + * Return: Number of bytes copied on success, negative error code on failure + */ +ssize_t smh_message_get(void __user *buf) +{ + int ret; + struct smh_msg_handler *handle = NULL; + struct smh_msg_handler *handle_ack; + DEFINE_WAIT(wait); + + if (waitqueue_active(&msg_ctx.user_wq)) { + RM_LOG_WARN("another process is waiting for message\n"); + return -EPERM; + } + + do { + ret = kfifo_out_spinlocked(&msg_ctx.msgbuf_send, &handle, + sizeof(handle), &msg_ctx.msgbuf_send_lock); + if (ret) { + if (check_msg_is_timeout(&handle->msg)) { + RM_LOG_INFO("smh_message_get: %llu timeout\n", handle->msg.msgid); + kfree(handle); + handle = NULL; + continue; + } + break; + } + + add_wait_queue_exclusive(&msg_ctx.user_wq, &wait); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + set_current_state(TASK_RUNNING); + remove_wait_queue(&msg_ctx.user_wq, &wait); + if (signal_pending(current)) { + RM_LOG_ERR("error reading message: process receive signal\n"); + return -ERESTART; + } + } while (1); + + if (!handle) + return -ENOMSG; + + RM_LOG_INFO("smh_message_get: get msg, msgid is %llu\n", handle->msg.msgid); + + ret = copy_to_user(buf, &handle->msg, sizeof(handle->msg)); + if (ret) { + RM_LOG_ERR("%s: failed to copy message to user: %d\n", __func__, ret); + ret = kfifo_in_spinlocked(&msg_ctx.msgbuf_send, &handle, + sizeof(handle), &msg_ctx.msgbuf_send_lock); + if (!ret) { + RM_LOG_ERR("error recover message %llu: buffer is full; message dropped\n", + handle->msg.msgid); + kfree(handle); + return -EFAULT; + } + return -EAGAIN; + } + + if (handle->ack) { + bool found = false; + + spin_lock(&msg_ctx.msgbuf_ack_lock); + list_for_each_entry(handle_ack, &msg_ctx.msgbuf_ack, ack_list) { + if (handle_ack->msg.msgid == handle->msg.msgid) { + found = true; + break; + } + } + if (!found) + list_add_tail(&handle->ack_list, &msg_ctx.msgbuf_ack); + spin_unlock(&msg_ctx.msgbuf_ack_lock); + } else { + kfree(handle); + } + + return sizeof(handle->msg); +} + +/** + * smh_message_ack - Acknowledge a message + * @msg: Message to acknowledge + * + * Return: 0 on success, negative error code on failure + */ +int smh_message_ack(struct sentry_msg_helper_msg *msg) +{ + struct smh_msg_handler *handle; + bool found = false; + + RM_LOG_INFO("smh_message_ack: %llu\n", msg->msgid); + + FIND_AND_REMOVE_TIMEOUT_FROM_LIST(handle, &msg_ctx.msgbuf_ack_lock, + &msg_ctx.msgbuf_ack, ack_list, + msg->msgid, found); + + if (!found) { + RM_LOG_ERR("smh_message_ack: %llu not found, maybe this message is not exist or has been timeout\n", + msg->msgid); + return -ENOENT; + } + + handle->msg.res = msg->res; + + spin_lock(&msg_ctx.msgbuf_get_lock); + list_add_tail(&handle->get_list, &msg_ctx.msgbuf_get); + spin_unlock(&msg_ctx.msgbuf_get_lock); + + return 0; +} + +/** + * smh_message_get_ack - Get acknowledgment for a message + * @msg: Message to get acknowledgment for + * + * Return: 1 if acknowledgment found, 0 otherwise + */ +int smh_message_get_ack(struct sentry_msg_helper_msg *msg) +{ + struct smh_msg_handler *handle; + bool found = false; + + FIND_AND_REMOVE_TIMEOUT_FROM_LIST(handle, &msg_ctx.msgbuf_get_lock, + &msg_ctx.msgbuf_get, get_list, + msg->msgid, found); + + if (found) { + msg->res = handle->msg.res; + kfree(handle); + } + + return found; +} +EXPORT_SYMBOL(smh_message_get_ack); + +/** + * smh_message_init - Initialize the message helper subsystem + * + * Return: 0 on success, negative error code on failure + */ +int smh_message_init(void) +{ + int ret; + + if (smh_message_buffer_length <= 0 || + smh_message_buffer_length > SMH_MESSAGE_BUFFER_MAX_LENGTH) { + RM_LOG_ERR("invalid smh_message_buffer_length\n"); + return -EINVAL; + } + + ret = kfifo_alloc(&msg_ctx.msgbuf_send, + sizeof(struct smh_msg_handler *) * smh_message_buffer_length, + GFP_KERNEL); + if (ret < 0) { + RM_LOG_ERR("error allocating send message buffer: %d\n", ret); + return ret; + } + spin_lock_init(&msg_ctx.msgbuf_send_lock); + + INIT_LIST_HEAD(&msg_ctx.msgbuf_ack); + spin_lock_init(&msg_ctx.msgbuf_ack_lock); + + INIT_LIST_HEAD(&msg_ctx.msgbuf_get); + spin_lock_init(&msg_ctx.msgbuf_get_lock); + + init_waitqueue_head(&msg_ctx.user_wq); + atomic64_set(&message_id_generator, 0); + + return 0; +} + +/** + * smh_message_exit - Cleanup the message helper subsystem + */ +void smh_message_exit(void) +{ + struct smh_msg_handler *handle, *tmp; + + /* Clean up acknowledgment list */ + spin_lock(&msg_ctx.msgbuf_ack_lock); + list_for_each_entry_safe(handle, tmp, &msg_ctx.msgbuf_ack, ack_list) { + list_del(&handle->ack_list); + kfree(handle); + } + spin_unlock(&msg_ctx.msgbuf_ack_lock); + + /* Clean up get list */ + spin_lock(&msg_ctx.msgbuf_get_lock); + list_for_each_entry_safe(handle, tmp, &msg_ctx.msgbuf_get, get_list) { + list_del(&handle->get_list); + kfree(handle); + } + spin_unlock(&msg_ctx.msgbuf_get_lock); + + kfifo_free(&msg_ctx.msgbuf_send); +} diff --git a/drivers/ub/sentry/smh_message.h b/drivers/ub/sentry/smh_message.h new file mode 100644 index 000000000000..f9bac3ab6a35 --- /dev/null +++ b/drivers/ub/sentry/smh_message.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Header File for Sentry Msg Helper + * Author: Luckky + * Create: 2025-02-17 + */ + +#ifndef SMH_MESSAGE_H +#define SMH_MESSAGE_H + +#include +#include "smh_common_type.h" + +uint64_t smh_get_new_msg_id(void); +int smh_message_send(struct sentry_msg_helper_msg *msg, bool ack); +ssize_t smh_message_get(void __user *buf); +int smh_message_ack(struct sentry_msg_helper_msg *msg); +int smh_message_get_ack(struct sentry_msg_helper_msg *msg); + +int smh_message_init(void); +void smh_message_exit(void); + +#endif -- Gitee From f17099960c08d3c7c885ef0d071d1344989837c1 Mon Sep 17 00:00:00 2001 From: shixuantong Date: Tue, 11 Nov 2025 14:53:08 +0800 Subject: [PATCH 26/48] drivers/ub/sentry: add sentry_urma_comm module commit be8404a74f33ca6edee769d7887e1cce31fa314c openEuler Provides URMA communication functionality and it dependent on kernel and umdk. Signed-off-by: guodashun Signed-off-by: shixuantong Signed-off-by: shixuantong --- drivers/ub/sentry/Kconfig | 7 + drivers/ub/sentry/Makefile | 1 + drivers/ub/sentry/sentry_urma_comm.c | 2288 ++++++++++++++++++++++++++ drivers/ub/sentry/smh_common_type.h | 7 + 4 files changed, 2303 insertions(+) create mode 100644 drivers/ub/sentry/sentry_urma_comm.c diff --git a/drivers/ub/sentry/Kconfig b/drivers/ub/sentry/Kconfig index b7501b0f0c51..9b2fad6d291a 100644 --- a/drivers/ub/sentry/Kconfig +++ b/drivers/ub/sentry/Kconfig @@ -8,3 +8,10 @@ config UB_SENTRY Listens to kernel event(eg. oom) and send sentry msg to userspace Provides device for userspace to read kernel message and reply ack Provides kernel api to send message to userspace and wait for result + +config UB_SENTRY_REMOTE + tristate "sentry remote event reporter module" + depends on UB_SENTRY && UB_URMA + default m + help + report panic/reboot event msg diff --git a/drivers/ub/sentry/Makefile b/drivers/ub/sentry/Makefile index 610582ab3b90..f4d4464a552b 100644 --- a/drivers/ub/sentry/Makefile +++ b/drivers/ub/sentry/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_UB_SENTRY) += sentry_msg_helper.o obj-$(CONFIG_UB_SENTRY) += sentry_reporter.o +obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_urma_comm.o sentry_msg_helper-y := smh_core.o smh_message.o diff --git a/drivers/ub/sentry/sentry_urma_comm.c b/drivers/ub/sentry/sentry_urma_comm.c new file mode 100644 index 000000000000..21ad57952e84 --- /dev/null +++ b/drivers/ub/sentry/sentry_urma_comm.c @@ -0,0 +1,2288 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: urma communication module + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_common_type.h" + +static int heartbeat_thread(void *arg); +static int rebuild_tjetty(int idx, int die_index); +static int sentry_post_jetty_send_wr(const char *buf, size_t len, int tjetty_idx, int die_index); +static int sentry_poll_jfc(struct ubcore_jfc *jfc, int cr_cnt, struct ubcore_cr *cr, int die_index); + +#define PROC_DEVICE_PATH "sentry_urma_comm" +#define PROC_DEVICE_NAME "client_info" +#define PROC_HEARTBEAT_SWITCH "heartbeat" +#define ENABLE_VALUE_MAX_LEN 4 /* 'off' + '\n' */ +#define MAX_JFC_DEPTH 96 +#define MAX_JFR_DEPTH 96 +#define MAX_JFS_DEPTH 96 +#define MAX_SGE 1 +#define MIN_RNR_TIMER 17 /* timeout time is 2^17*4.096usec≈536ms */ +#define SGE_MAX_LEN 4096 +#define DEFAULT_INVALID_JETTY_ID (-1) +#define MIN_JETTY_ID 3 +#define MAX_JETTY_ID 1023 +#define JETTY_ID_MAX_LEN 6 +#define UVS_IPV4_MAP_IPV6_PREFIX 0x0000ffff +#define URMA_CNT_MAX_NUM (1U << 20) +#define HB_WAIT_ACK_SLEEP_MS 3000 +#define HEARTBEAT_INTERVAL_MS 60000 /* 60s */ +#define URMA_LOCK 1 +#define URMA_UNLOCK 0 +#define EID_PART_NUM 8 +#define CLIENT_INFO_MAX_LEN (((EID_MAX_LEN + 1) * MAX_NODE_NUM - 1) * 2 + 1 + 1 + \ + JETTY_ID_MAX_LEN + 1) +/* The maximum length of the server_eid content in client info */ +#define SERVER_EID_PART_MAX_LEN (((EID_MAX_LEN + 1) * MAX_NODE_NUM - 1) * 2 + 1 + 1) +#define SINGLE_SERVER_PART_LEN ((EID_MAX_LEN + 1) * MAX_NODE_NUM - 1 + 1) + +/* + * 32 * (EID_MAX_LEN + 1) + 32 (31 * ";" + 1 * " ") + jetty_id + '\n' + '\0' + + * "server_id:, client_jetty_id:" + */ +#define CLIENT_INFO_BUF_MAX_LEN ((MAX_NODE_NUM + 1) * (EID_MAX_LEN + 1) + JETTY_ID_MAX_LEN + 35) + +struct ubcore_dev_list { + struct ubcore_device *dev; + struct list_head list; +}; + +LIST_HEAD(ub_dev_list_head); + +static struct ubcore_jfc_cfg default_jfc_cfg = { + .depth = MAX_JFC_DEPTH, + .flag.bs.lock_free = false, + .flag.bs.jfc_inline = false, + .ceqn = 0, +}; + +static struct ubcore_jfr_cfg default_jfr_cfg = { + .depth = MAX_JFR_DEPTH, + .flag.bs.token_policy = UBCORE_TOKEN_NONE, + .flag.bs.lock_free = false, + .flag.bs.tag_matching = false, + .trans_mode = UBCORE_TP_RM, + .max_sge = MAX_SGE, + .min_rnr_timer = MIN_RNR_TIMER, +}; + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][urma]: " fmt + +struct sentry_ubcore_resource { + bool is_created; + + /* dev resource */ + struct ubcore_device *sentry_ubcore_dev; + struct ubcore_tjetty *tjetty[MAX_NODE_NUM]; + struct ubcore_jfs_wr jfs_wr[MAX_NODE_NUM]; + struct ubcore_jfr_wr jfr_wr[MAX_NODE_NUM]; + struct ubcore_sge s_sge[MAX_NODE_NUM]; + struct ubcore_sge r_sge[MAX_NODE_NUM]; + struct ubcore_jetty *jetty; + struct ubcore_jfc *sender_jfc; + struct ubcore_jfc *receiver_jfc; + struct ubcore_jfr *jetty_jfr; + struct ubcore_target_seg *s_seg; + struct ubcore_target_seg *r_seg; + void *s_seg_va; + void *r_seg_va; + + /* eid info */ + union ubcore_eid local_eid; + union ubcore_eid server_eid[MAX_NODE_NUM]; + char server_eid_array[MAX_NODE_NUM][EID_MAX_LEN]; + int server_eid_valid_num; + uint32_t eid_index; + + /* cnt for retry */ + atomic_t send_cnt[MAX_NODE_NUM]; + atomic_t remote_recv_cnt[MAX_NODE_NUM]; + atomic_t urma_hb_ack_list[MAX_NODE_NUM]; /* 0 = down, 1 = up */ +}; + +struct sentry_urma_context { + /* Heartbeat threads and state */ + struct task_struct *hb_thread; + bool heartbeat_enable; + + uint32_t client_jetty_id; + int local_eid_num_configured; + int server_eid_num_configured; + bool is_panic_mode; + + char *kbuf; /* server_buf client_jetty_id */ + char *server_buf_part; + char *client_jetty_id_part; + char *client_info_buf; /* for proc_read */ + bool is_valid_client_info; + + struct ubcore_cr *update_recv_cnt_cr; + struct ubcore_cr *heartbeat_thread_cr; + struct ubcore_cr *urma_recv_cr; + struct ubcore_cr *urma_recv_sender_cr; + + bool is_register_ubcore_client; + + struct proc_dir_entry *proc_dir; +}; + +static DEFINE_MUTEX(sentry_urma_mutex); +static struct sentry_ubcore_resource sentry_urma_dev[MAX_DIE_NUM]; +static struct sentry_urma_context sentry_urma_ctx; + +bool g_is_created_ubcore_resource; +EXPORT_SYMBOL(g_is_created_ubcore_resource); + +/** + * urma_mutex_lock_op - Lock or unlock the URMA mutex based on panic mode + * @is_to_lock: URMA_LOCK to lock, URMA_UNLOCK to unlock + * + * This function handles mutex locking/unlocking only when not in panic mode + * to avoid deadlocks during system panic. + */ +static void urma_mutex_lock_op(int is_to_lock) +{ + if (!sentry_urma_ctx.is_panic_mode) { + if (is_to_lock) + mutex_lock(&sentry_urma_mutex); + else + mutex_unlock(&sentry_urma_mutex); + } +} + +/** + * swap_eid_byteorder - Swap byte order of EID + * @dst: Destination EID buffer + * @src: Source EID buffer + * + * This function swaps the byte order of EID from big-endian to little-endian. + */ +static inline void swap_eid_byteorder(uint8_t dst[UBCORE_EID_SIZE], + const uint8_t src[UBCORE_EID_SIZE]) +{ + int i; + + for (i = 0; i < UBCORE_EID_SIZE; i++) + dst[i] = src[UBCORE_EID_SIZE - 1 - i]; +} + +/** + * compare_ubcore_eid - Compare two URMA EIDs with byte order handling + * @src_eid: Source EID to compare + * @dst_eid: Destination EID to compare against + * + * Return: 0 if EIDs match, -EINVAL if they don't match even after byte order swap + * + * This function compares two EIDs and handles potential byte order differences + * by attempting a byte-swapped comparison if the initial comparison fails. + */ +static int compare_ubcore_eid(const union ubcore_eid src_eid, + const union ubcore_eid dst_eid) +{ + if (memcmp(&src_eid, &dst_eid, sizeof(union ubcore_eid)) == 0) + return 0; + + /* + * The byte order of the saved data may differ; + * compare again after conversion. + */ + union ubcore_eid new_src_eid; + + swap_eid_byteorder(new_src_eid.raw, src_eid.raw); + if (memcmp(&new_src_eid, &dst_eid, sizeof(union ubcore_eid)) == 0) { + pr_info("change byte order to match success, src eid:%llx, %x, %x, new src eid: %llx, %x, %x\n", + src_eid.in4.reserved, src_eid.in4.prefix, src_eid.in4.addr, + new_src_eid.in4.reserved, new_src_eid.in4.prefix, + new_src_eid.in4.addr); + return 0; + } + pr_err("match eid failed, src eid:%llx, %x, %x, dst eid: %llx, %x, %x\n", + src_eid.in4.reserved, src_eid.in4.prefix, src_eid.in4.addr, + dst_eid.in4.reserved, dst_eid.in4.prefix, dst_eid.in4.addr); + return -EINVAL; +} + +/** + * sentry_add_device - Add URMA device to the device list + * @dev: URMA device to add + * + * Return: 0 on success, -ENOMEM on memory allocation failure + * + * This function allocates and initializes a device node and adds it to + * the global URMA device list. + */ +static int sentry_add_device(struct ubcore_device *dev) +{ + struct ubcore_dev_list *dev_node; + + dev_node = kmalloc(sizeof(*dev_node), GFP_KERNEL); + if (!dev_node) { + pr_err("failed to allocate dev node\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&dev_node->list); + dev_node->dev = dev; + list_add_tail(&dev_node->list, &ub_dev_list_head); + + return 0; +} + +/** + * sentry_remove_device - Remove URMA device from the device list + * @dev: URMA device to remove + * @d: Unused parameter + * + * This function searches for the specified device in the global list + * and removes it, freeing the associated memory. + */ +static void sentry_remove_device(struct ubcore_device *dev, void *d __always_unused) +{ + struct ubcore_dev_list *dev_node; + + list_for_each_entry(dev_node, &ub_dev_list_head, list) { + if (dev_node->dev == dev) { + list_del(&dev_node->list); + kfree(dev_node); + break; + } + } +} + +static struct ubcore_client sentry_ubcore_client = { + .list_node = LIST_HEAD_INIT(sentry_ubcore_client.list_node), + .client_name = "sentry_ubcore_client", + .add = sentry_add_device, + .remove = sentry_remove_device, +}; + +/** + * free_global_char - Free all dynamically allocated global character buffers + * + * This function safely frees all global character buffers used in the module + * and sets the pointers to NULL to prevent use-after-free. + */ +void free_global_char(void) +{ + kfree(sentry_urma_ctx.kbuf); + sentry_urma_ctx.kbuf = NULL; + + kfree(sentry_urma_ctx.server_buf_part); + sentry_urma_ctx.server_buf_part = NULL; + + kfree(sentry_urma_ctx.client_jetty_id_part); + sentry_urma_ctx.client_jetty_id_part = NULL; + + kfree(sentry_urma_ctx.client_info_buf); + sentry_urma_ctx.client_info_buf = NULL; + + kfree(sentry_urma_ctx.update_recv_cnt_cr); + sentry_urma_ctx.update_recv_cnt_cr = NULL; + + kfree(sentry_urma_ctx.heartbeat_thread_cr); + sentry_urma_ctx.heartbeat_thread_cr = NULL; + + kfree(sentry_urma_ctx.urma_recv_cr); + sentry_urma_ctx.urma_recv_cr = NULL; + + kfree(sentry_urma_ctx.urma_recv_sender_cr); + sentry_urma_ctx.urma_recv_sender_cr = NULL; +} + +/** + * unimport_tjetty - Unimport all target jetties for a specific die + * @die_index: Index of the die to unimport jetties from + * + * Return: 0 on success, -EINVAL on invalid die_index + * + * This function unimports all target jetties associated with a specific die + * index and cleans up the references. + */ +static int unimport_tjetty(int die_index) +{ + int i; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + for (i = 0; i < MAX_NODE_NUM; i++) { + if (sentry_urma_dev[die_index].tjetty[i]) { + ubcore_unimport_jetty(sentry_urma_dev[die_index].tjetty[i]); + sentry_urma_dev[die_index].tjetty[i] = NULL; + } + } + + return 0; +} + +/** + * init_global_char - Initialize global character buffers + * + * Return: 0 on success, -ENOMEM on allocation failure + * + * This function allocates and initializes all global character buffers + * used for client information storage and communication. + */ +int init_global_char(void) +{ + sentry_urma_ctx.kbuf = kzalloc(CLIENT_INFO_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.kbuf) { + pr_err("kzalloc kbuf failed\n"); + goto err_free; + } + + sentry_urma_ctx.server_buf_part = kzalloc(SERVER_EID_PART_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.server_buf_part) { + pr_err("kzalloc server_buf_part failed\n"); + goto err_free; + } + + sentry_urma_ctx.client_jetty_id_part = kzalloc(JETTY_ID_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.client_jetty_id_part) { + pr_err("kzalloc client_jetty_id_part failed\n"); + goto err_free; + } + + sentry_urma_ctx.client_info_buf = kzalloc(CLIENT_INFO_BUF_MAX_LEN, GFP_KERNEL); + if (!sentry_urma_ctx.client_info_buf) { + pr_err("kzalloc client_info_buf failed\n"); + goto err_free; + } + + sentry_urma_ctx.update_recv_cnt_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.update_recv_cnt_cr) { + pr_err("kzalloc update_recv_cnt_cr failed\n"); + goto err_free; + } + sentry_urma_ctx.heartbeat_thread_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.heartbeat_thread_cr) { + pr_err("kzalloc heartbeat_thread_cr failed\n"); + goto err_free; + } + sentry_urma_ctx.urma_recv_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.urma_recv_cr) { + pr_err("kzalloc urma_recv_cr failed\n"); + goto err_free; + } + sentry_urma_ctx.urma_recv_sender_cr = kzalloc(sizeof(struct ubcore_cr) * MAX_NODE_NUM, GFP_KERNEL); + if (!sentry_urma_ctx.urma_recv_sender_cr) { + pr_err("kzalloc urma_recv_sender_cr failed\n"); + goto err_free; + } + + return 0; + +err_free: + free_global_char(); + return -ENOMEM; +} + +/** + * init_ubcore - Initialize URMA core functionality + * + * Return: 0 on success, appropriate error code on failure + * + * This function registers the URMA client and verifies that at least one + * URMA device is available. It handles the initialization of URMA core + * components. + */ +int init_ubcore(void) +{ + int ret; + + if (!list_empty(&ub_dev_list_head)) { + pr_err("hw_clear is already setup\n"); + return -EEXIST; + } + + ret = ubcore_register_client(&sentry_ubcore_client); + if (ret) { + pr_err("fail to register ubcore client\n"); + return -EFAULT; + } + + sentry_urma_ctx.is_register_ubcore_client = true; + pr_info("ubcore_register_client success\n"); + + if (list_empty(&ub_dev_list_head)) { + pr_err("fail to get ubcore device\n"); + ret = -ENODEV; + goto init_ubcore_fail; + } + + return 0; + +init_ubcore_fail: + ubcore_unregister_client(&sentry_ubcore_client); + sentry_urma_ctx.is_register_ubcore_client = false; + return ret; +} + +/** + * release_ubcore_resource - Release all URMA resources for all dies + * + * This function stops the heartbeat thread and releases all URMA resources + * including jetties, segments, JFRs, and JFCs for all die indices. + * It handles resource cleanup in the proper order to avoid dependency issues. + */ +static void release_ubcore_resource(void) +{ + int die_index; + + urma_mutex_lock_op(URMA_LOCK); + + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + + g_is_created_ubcore_resource = false; + + /* Release resources for each die */ + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + unimport_tjetty(die_index); + + if (sentry_urma_dev[die_index].jetty) { + ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); + sentry_urma_dev[die_index].jetty = NULL; + } + + if (sentry_urma_dev[die_index].s_seg) { + ubcore_unregister_seg(sentry_urma_dev[die_index].s_seg); + sentry_urma_dev[die_index].s_seg = NULL; + kfree(sentry_urma_dev[die_index].s_seg_va); + sentry_urma_dev[die_index].s_seg_va = NULL; + } + + if (sentry_urma_dev[die_index].r_seg) { + ubcore_unregister_seg(sentry_urma_dev[die_index].r_seg); + sentry_urma_dev[die_index].r_seg = NULL; + kfree(sentry_urma_dev[die_index].r_seg_va); + sentry_urma_dev[die_index].r_seg_va = NULL; + } + + if (sentry_urma_dev[die_index].jetty_jfr) { + ubcore_delete_jfr(sentry_urma_dev[die_index].jetty_jfr); + sentry_urma_dev[die_index].jetty_jfr = NULL; + } + + if (sentry_urma_dev[die_index].receiver_jfc) { + ubcore_delete_jfc(sentry_urma_dev[die_index].receiver_jfc); + sentry_urma_dev[die_index].receiver_jfc = NULL; + } + + if (sentry_urma_dev[die_index].sender_jfc) { + ubcore_delete_jfc(sentry_urma_dev[die_index].sender_jfc); + sentry_urma_dev[die_index].sender_jfc = NULL; + } + + sentry_urma_dev[die_index].sentry_ubcore_dev = NULL; + sentry_urma_dev[die_index].is_created = false; + } + + urma_mutex_lock_op(URMA_UNLOCK); +} + +/** + * release_all_resource - Release all URMA resources and unregister client + * + * This function cleans up all allocated URMA resources including device + * resources and unregisters the URMA client if it was registered. + */ +static void release_all_resource(void) +{ + release_ubcore_resource(); + + if (sentry_urma_ctx.is_register_ubcore_client) { + ubcore_unregister_client(&sentry_ubcore_client); + sentry_urma_ctx.is_register_ubcore_client = false; + } +} + +/** + * str_to_eid - Convert string representation to URMA EID + * @eid_str: String representation of EID + * @eid: Pointer to store converted EID + * + * Return: 0 on success, -EINVAL on invalid input + * + * This function converts a string representation of an EID to the binary + * format used by URMA, supporting IPv6 notation. + */ +int str_to_eid(const char *eid_str, union ubcore_eid *eid) +{ + if (strlen(eid_str) != EID_MAX_LEN - 1) { + pr_err("eid str %s len is invalid, failed to transfer\n", eid_str); + return -EINVAL; + } + + if (in6_pton(eid_str, EID_MAX_LEN, (u8 *)eid, '\0', NULL) > 0) { + pr_info("parse eid success, config eid: %llx, %x, %x\n", + eid->in4.reserved, eid->in4.prefix, eid->in4.addr); + return 0; + } + + pr_err("parse eid string [%s] failed\n", eid_str); + return -EINVAL; +} +EXPORT_SYMBOL(str_to_eid); + +/** + * set_urma_panic_mode - Set URMA panic mode status + * @is_panic: true to in panic mode, false to otherwise + * + * This function sets the panic mode flag which affects mutex locking + * behavior during system panic conditions. + */ +void set_urma_panic_mode(bool is_panic) +{ + sentry_urma_ctx.is_panic_mode = is_panic; +} +EXPORT_SYMBOL(set_urma_panic_mode); + +/** + * sentry_register_seg - Register a segment for URMA operations + * @dev: URMA device to register segment with + * @num_sge: Number of scatter-gather elements + * @is_send: true for send segment, false for receive segment + * @die_index: Index of the die for resource tracking + * + * Return: Pointer to registered segment on success, ERR_PTR on failure + * + * This function registers a memory segment with the URMA device for + * send or receive operations. + */ +static struct ubcore_target_seg *sentry_register_seg(struct ubcore_device *dev, + uint32_t num_sge, bool is_send, + int die_index) +{ + union ubcore_reg_seg_flag flag = {0}; + uint64_t seg_len = SGE_MAX_LEN * num_sge; + struct ubcore_seg_cfg cfg = {0}; + struct ubcore_target_seg *ret; + void *seg_va; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return ERR_PTR(-EINVAL); + } + + seg_va = kzalloc(seg_len, GFP_KERNEL); + if (!seg_va) + return ERR_PTR(-ENOMEM); + + flag.bs.token_policy = UBCORE_TOKEN_NONE; + flag.bs.cacheable = UBCORE_NON_CACHEABLE; + flag.bs.access = UBCORE_ACCESS_LOCAL_ONLY; + cfg.va = (uint64_t)seg_va; + cfg.len = seg_len; + cfg.flag = flag; + + ret = ubcore_register_seg(dev, &cfg, NULL); + if (IS_ERR_OR_NULL(ret)) { + pr_err("reg seg failed\n"); + goto free_seg; + } + + if (is_send) + sentry_urma_dev[die_index].s_seg_va = seg_va; + else + sentry_urma_dev[die_index].r_seg_va = seg_va; + + return ret; + +free_seg: + kfree(seg_va); + return ret; +} + +/** + * sentry_create_jetty - Create a URMA jetty endpoint + * @device: URMA device to create jetty on + * @jfc_s: Send completion queue + * @jfc_r: Receive completion queue + * @jfr: Receive work queue + * @jetty_id: Jetty identifier + * + * Return: Pointer to created jetty on success, NULL on failure + * + * This function creates a jetty endpoint with the specified configuration + * for URMA communication. + */ +static struct ubcore_jetty *sentry_create_jetty(struct ubcore_device *device, + struct ubcore_jfc *jfc_s, + struct ubcore_jfc *jfc_r, + struct ubcore_jfr *jfr, + uint32_t jetty_id) +{ + struct ubcore_jetty_cfg jetty_cfg = { + .id = jetty_id, + .flag.bs.share_jfr = 1, + .trans_mode = UBCORE_TP_RM, + .eid_index = 0, + .jfs_depth = MAX_JFS_DEPTH, + .priority = 0, /* Highest priority */ + .max_send_sge = 1, + .max_send_rsge = 1, + .jfr_depth = MAX_JFR_DEPTH, + .max_recv_sge = 1, + .send_jfc = jfc_s, + .recv_jfc = jfc_r, + .jfr = jfr, + }; + + return ubcore_create_jetty(device, &jetty_cfg, NULL, NULL); +} + +/** + * sentry_post_recv - Post a receive work request to a jetty + * @r_jetty: Receive jetty to post to + * @recv_seg: Receive segment to use + * @node_idx: Node index for scatter-gather element + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function posts a receive work request to the specified jetty + * for asynchronous data reception. + */ +int sentry_post_recv(struct ubcore_jetty *r_jetty, struct ubcore_target_seg *recv_seg, + int node_idx, int die_index) +{ + uint64_t sge_addr; + struct ubcore_jfr_wr *jfr_bad_wr = NULL; + int ret; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + sge_addr = (uint64_t)sentry_urma_dev[die_index].r_seg_va + SGE_MAX_LEN * node_idx; + sentry_urma_dev[die_index].r_sge[node_idx].addr = sge_addr; + sentry_urma_dev[die_index].r_sge[node_idx].len = SGE_MAX_LEN; + sentry_urma_dev[die_index].r_sge[node_idx].tseg = recv_seg; + sentry_urma_dev[die_index].jfr_wr[node_idx].src.sge = + &sentry_urma_dev[die_index].r_sge[node_idx]; + sentry_urma_dev[die_index].jfr_wr[node_idx].src.num_sge = 1; + sentry_urma_dev[die_index].jfr_wr[node_idx].user_ctx = sge_addr; + + ret = ubcore_post_jetty_recv_wr(r_jetty, + &sentry_urma_dev[die_index].jfr_wr[node_idx], + &jfr_bad_wr); + if (ret != 0 && ret != -ENOMEM) { + pr_err("sentry_post_recv: ubcore_post_jetty_recv_wr failed, ret %d\n", ret); + return ret; + } + + return 0; +} + +/** + * create_ubcore_resource - Create URMA core resources for a specific die + * @die_index: Index of the die to create resources for + * + * Return: 0 on success, negative error code on failure + * + * This function creates all necessary URMA resources including JFCs, JFRs, + * segments, and jetties for the specified die index. + */ +static int create_ubcore_resource(int die_index) +{ + int ret; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + urma_mutex_lock_op(URMA_LOCK); + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + urma_mutex_lock_op(URMA_UNLOCK); + pr_err("Please set eid first\n"); + return -EINVAL; + } + + /* Create sender JFC */ + sentry_urma_dev[die_index].sender_jfc = + ubcore_create_jfc(sentry_urma_dev[die_index].sentry_ubcore_dev, + &default_jfc_cfg, NULL, NULL, NULL); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].sender_jfc)) { + pr_err("ubcore_create_jfc err\n"); + sentry_urma_dev[die_index].sender_jfc = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + ret = ubcore_rearm_jfc(sentry_urma_dev[die_index].sender_jfc, false); + if (ret != 0) { + pr_err("rearm jfc_r failed, ret %d\n", ret); + goto err_create_urma_resource; + } + pr_info("ubcore_create_jfc success\n"); + + /* Create receiver JFC */ + sentry_urma_dev[die_index].receiver_jfc = + ubcore_create_jfc(sentry_urma_dev[die_index].sentry_ubcore_dev, + &default_jfc_cfg, NULL, NULL, NULL); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].receiver_jfc)) { + pr_err("ubcore_create_jfc err\n"); + sentry_urma_dev[die_index].receiver_jfc = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + ret = ubcore_rearm_jfc(sentry_urma_dev[die_index].receiver_jfc, false); + if (ret != 0) { + pr_err("rearm jfc_r failed, ret %d\n", ret); + goto err_create_urma_resource; + } + pr_info("ubcore_create_jfc success\n"); + + /* Create JFR */ + default_jfr_cfg.eid_index = sentry_urma_dev[die_index].eid_index; + default_jfr_cfg.jfc = sentry_urma_dev[die_index].receiver_jfc; + sentry_urma_dev[die_index].jetty_jfr = + ubcore_create_jfr(sentry_urma_dev[die_index].sentry_ubcore_dev, + &default_jfr_cfg, NULL, NULL); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].jetty_jfr)) { + pr_err("ubcore_create_jfr err\n"); + sentry_urma_dev[die_index].jetty_jfr = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + pr_info("ubcore_create_jfr success\n"); + + /* Register send segment */ + sentry_urma_dev[die_index].s_seg = + sentry_register_seg(sentry_urma_dev[die_index].sentry_ubcore_dev, + MAX_NODE_NUM, true, die_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].s_seg)) { + pr_err("ubcore_register_s_seg err\n"); + sentry_urma_dev[die_index].s_seg = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + /* Register receive segment */ + sentry_urma_dev[die_index].r_seg = + sentry_register_seg(sentry_urma_dev[die_index].sentry_ubcore_dev, + MAX_NODE_NUM, false, die_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].r_seg)) { + pr_err("ubcore_register_r_seg err\n"); + sentry_urma_dev[die_index].r_seg = NULL; + ret = -EFAULT; + goto err_create_urma_resource; + } + + sentry_urma_dev[die_index].is_created = true; + pr_info("ubcore_register_seg success\n"); + urma_mutex_lock_op(URMA_UNLOCK); + + return 0; + +err_create_urma_resource: + urma_mutex_lock_op(URMA_UNLOCK); + release_ubcore_resource(); + return ret; +} + +/** + * create_tjetty - Create a target jetty for remote communication + * @tjetty_cfg: Target jetty configuration + * @eid_index: EID index for the target + * @die_index: Die index for resource access + * + * Return: Pointer to created target jetty on success, NULL on failure + * + * This function creates a target jetty for communication with a remote + * endpoint specified by the EID index. + */ +static struct ubcore_tjetty *create_tjetty(struct ubcore_tjetty_cfg *tjetty_cfg, + int eid_index, int die_index) +{ + int ret; + struct ubcore_get_tp_cfg tp_cfg = { + .flag.bs.ctp = 1, + .trans_mode = UBCORE_TP_RM, + .local_eid = sentry_urma_dev[die_index].local_eid, + .peer_eid = sentry_urma_dev[die_index].server_eid[eid_index], + }; + uint32_t tp_cnt = 1; + struct ubcore_tp_info tp_list = {}; + struct ubcore_active_tp_cfg active_tp_cfg = {}; + + ret = ubcore_get_tp_list(sentry_urma_dev[die_index].sentry_ubcore_dev, + &tp_cfg, &tp_cnt, &tp_list, NULL); + if (ret != 0) { + pr_err("ubcore_get_tp_list failed, ret %d, server eid %s\n", + ret, sentry_urma_dev[die_index].server_eid_array[eid_index]); + return NULL; + } + + active_tp_cfg.tp_handle = tp_list.tp_handle; + return ubcore_import_jetty_ex(sentry_urma_dev[die_index].sentry_ubcore_dev, + tjetty_cfg, &active_tp_cfg, NULL); +} + +/** + * import - Import and configure URMA jetties for all dies + * + * Return: 0 on success, -EFAULT on failure + * + * This function imports and configures URMA jetties for all configured dies, + * creates local jetties, posts receive work requests, and starts the heartbeat + * thread if enabled. It handles the complete initialization of URMA communication + * endpoints. + */ +int import(void) +{ + struct ubcore_tjetty_cfg tjetty_cfg = {0}; + int ret = 0; + int die_index; + int tjetty_valid_num; + + if (sentry_urma_ctx.client_jetty_id == DEFAULT_INVALID_JETTY_ID) { + pr_err("client_jetty_id not set, import failed\n"); + return -EFAULT; + } + + urma_mutex_lock_op(URMA_LOCK); + + g_is_created_ubcore_resource = false; + + /* Stop existing heartbeat thread */ + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + + /* Configure target jetty */ + tjetty_cfg.id.id = sentry_urma_ctx.client_jetty_id; + tjetty_cfg.flag.bs.token_policy = UBCORE_TOKEN_NONE; + tjetty_cfg.trans_mode = UBCORE_TP_RM; + tjetty_cfg.type = UBCORE_JETTY; + + /* Process each die */ + for (die_index = 0; die_index < sentry_urma_ctx.server_eid_num_configured; die_index++) { + int i; + + tjetty_valid_num = 0; + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("Please set eid first\n"); + goto print_import_result; + } + + /* Clean existing jetties */ + unimport_tjetty(die_index); + if (sentry_urma_dev[die_index].jetty) { + ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); + sentry_urma_dev[die_index].jetty = NULL; + } + + /* Create local jetty */ + sentry_urma_dev[die_index].jetty = + sentry_create_jetty(sentry_urma_dev[die_index].sentry_ubcore_dev, + sentry_urma_dev[die_index].sender_jfc, + sentry_urma_dev[die_index].receiver_jfc, + sentry_urma_dev[die_index].jetty_jfr, + sentry_urma_ctx.client_jetty_id); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].jetty)) { + sentry_urma_dev[die_index].jetty = NULL; + pr_err("ubcore_create_jetty failed for device %s\n", + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name); + goto print_import_result; + } + pr_info("ubcore_create_jetty success for device %s\n", + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name); + + /* Post receive work requests */ + for (i = 0; i < MAX_NODE_NUM; i++) { + ret = sentry_post_recv(sentry_urma_dev[die_index].jetty, + sentry_urma_dev[die_index].r_seg, i, die_index); + if (ret != 0) { + pr_err("No. %u post recv failed, device %s ret %d\n", i, + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name, ret); + ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); + sentry_urma_dev[die_index].jetty = NULL; + goto print_import_result; + } + } + + g_is_created_ubcore_resource = true; + + /* Import target jetties for remote servers (skip local EID at index 0) */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + tjetty_cfg.id.eid = sentry_urma_dev[die_index].server_eid[i]; + sentry_urma_dev[die_index].tjetty[i] = + create_tjetty(&tjetty_cfg, i, die_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[die_index].tjetty[i])) { + pr_warn("ubcore_import_jetty_ex err, server eid %s\n", + sentry_urma_dev[die_index].server_eid_array[i]); + sentry_urma_dev[die_index].tjetty[i] = NULL; + continue; + } + tjetty_valid_num++; + } + +print_import_result: + pr_info("import: %d/%d success for device %s\n", + tjetty_valid_num, + sentry_urma_dev[die_index].server_eid_valid_num - 1, /* Exclude local EID */ + sentry_urma_dev[die_index].sentry_ubcore_dev->dev_name); + } + + /* Start heartbeat thread if enabled */ + if (sentry_urma_ctx.heartbeat_enable) { + sentry_urma_ctx.hb_thread = kthread_run(heartbeat_thread, NULL, "urma_hb_all"); + if (IS_ERR(sentry_urma_ctx.hb_thread)) { + pr_err("failed to start heartbeat thread\n"); + sentry_urma_ctx.hb_thread = NULL; + } else { + pr_info("urma_hb_all thread start success\n"); + } + } + + urma_mutex_lock_op(URMA_UNLOCK); + return g_is_created_ubcore_resource ? 0 : -EFAULT; +} + +/** + * match_dev_by_local_eid - Find URMA device matching the specified local EID + * @eid: Local EID to match + * @eid_index: Output parameter for EID index + * + * Return: Pointer to matching URMA device, NULL if not found + * + * This function searches through all registered URMA devices to find one + * that has an EID matching the specified local EID. + */ +static struct ubcore_device *match_dev_by_local_eid(const union ubcore_eid *eid, + uint32_t *eid_index) +{ + int cnt = 0; + struct ubcore_dev_list *dev_node; + + list_for_each_entry(dev_node, &ub_dev_list_head, list) { + struct ubcore_eid_info *eid_info = ubcore_get_eid_list(dev_node->dev, &cnt); + int i; + + if (IS_ERR_OR_NULL(eid_info)) { + pr_warn("ubcore_get_eid_list failed\n"); + continue; + } + + /* One device may have multiple EIDs */ + for (i = 0; i < cnt; i++) { + pr_info("eid_info->eid: %llx, %x, %x, try to match\n", + eid_info->eid.in4.reserved, eid_info->eid.in4.prefix, + eid_info->eid.in4.addr); + + if (compare_ubcore_eid(eid_info->eid, *eid) == 0) { + pr_info("Match device %s, use it to send/recv data\n", + dev_node->dev->dev_name); + *eid_index = eid_info->eid_index; + return dev_node->dev; + } + eid_info++; + } + } + + pr_err("Cannot find dev by eid: %llx, %x, %x\n", + eid->in4.reserved, eid->in4.prefix, eid->in4.addr); + return NULL; +} + +/** + * match_index_by_remote_ub_eid - Find node and die indices by remote EID + * @remote_eid: Remote EID to search for + * @node_index: Output parameter for node index + * @die_index: Input/Output parameter for die index + * + * Return: 0 on success, -EINVAL if not found + * + * This function searches for a remote EID across all configured dies and nodes. + * If die_index is -1 on input, it will be set to the found die index. + * If die_index is specified, it verifies consistency. + */ +int match_index_by_remote_ub_eid(union ubcore_eid remote_eid, int *node_index, int *die_index) +{ + int i, j; + + for (i = 0; i < sentry_urma_ctx.local_eid_num_configured; i++) { + if (!sentry_urma_dev[i].is_created) { + pr_err("invalid value for sentry_urma_dev[%d].is_created\n", i); + return -EINVAL; + } + + for (j = 0; j < sentry_urma_dev[i].server_eid_valid_num; j++) { + if (memcmp(&sentry_urma_dev[i].server_eid[j], &remote_eid, + sizeof(union ubcore_eid)) == 0) { + *node_index = j; + if (*die_index == -1) { + *die_index = i; + } else if (*die_index != i) { + pr_err("%s error, get die_index %d, input die_index %d\n", + __func__, i, *die_index); + return -1; + } + return 0; + } + } + } + + return -EINVAL; +} +EXPORT_SYMBOL(match_index_by_remote_ub_eid); + +/** + * sentry_create_urma_resource - Create URMA resources for specified EIDs + * @eid: Array of local EIDs to create resources for + * @eid_num: Number of EIDs in the array + * + * Return: 0 on success, negative error code on failure + * + * This function initializes URMA core, creates resources for each specified EID, + * and matches devices to the provided EIDs. It handles both initial setup and + * reconfiguration scenarios. + */ +int sentry_create_urma_resource(union ubcore_eid eid[], int eid_num) +{ + int ret; + bool is_the_same = true; + union ubcore_eid initial_value = {0}; + int i; + + /* Prepare for new device matching by cleaning up old resources */ + release_all_resource(); + + ret = init_ubcore(); + if (ret) { + pr_err("ubcore init failed\n"); + return -EINVAL; + } + pr_info("ubcore init success\n"); + + /* Check if the current EID configuration is the same as previous */ + for (i = 0; i < MAX_DIE_NUM; i++) { + if (memcmp(&eid[i], &initial_value, sizeof(union ubcore_eid)) == 0) + break; + + /* + * Settings are considered changed in two scenarios: + * 1. This is a new setting (no previous value exists). + * 2. A previous value exists, but the new value is different. + */ + if ((sentry_urma_dev[i].is_created && + memcmp(&sentry_urma_dev[i].local_eid, &eid[i], + sizeof(union ubcore_eid)) != 0) || + !sentry_urma_dev[i].is_created) { + is_the_same = false; + break; + } + } + + if (is_the_same) { + pr_info("New eid is the same with current eid, skip to create new resource\n"); + return 0; + } + + /* Create resources for each EID */ + for (i = 0; i < eid_num; i++) { + sentry_urma_dev[i].sentry_ubcore_dev = + match_dev_by_local_eid(&eid[i], &sentry_urma_dev[i].eid_index); + if (IS_ERR_OR_NULL(sentry_urma_dev[i].sentry_ubcore_dev)) + return -EINVAL; + + /* Re-create new URMA resource (e.g., jfs/jfc/jfr/seg) */ + ret = create_ubcore_resource(i); + if (ret) { + pr_err("create_ubcore_resource failed for %llx, %x, %x\n", + eid[i].in4.reserved, eid[i].in4.prefix, eid[i].in4.addr); + release_ubcore_resource(); + return ret; + } + + /* Update URMA EID after successful resource creation */ + memcpy(&sentry_urma_dev[i].local_eid, &eid[i], sizeof(union ubcore_eid)); + } + + sentry_urma_ctx.local_eid_num_configured = eid_num; + return 0; +} +EXPORT_SYMBOL(sentry_create_urma_resource); + +/** + * format_client_info_show_str - Format client information for display + * + * This function formats the client information string for procfs display, + * including server EIDs and client jetty ID in a human-readable format. + */ +static void format_client_info_show_str(void) +{ + bool is_not_single_die = false; + char *p; + int i, j; + + /* Clean up old data */ + if (sentry_urma_ctx.client_info_buf && sentry_urma_ctx.is_valid_client_info) + memset(sentry_urma_ctx.client_info_buf, 0, CLIENT_INFO_BUF_MAX_LEN); + + if (sentry_urma_ctx.is_valid_client_info) { + p = sentry_urma_ctx.client_info_buf; + + for (i = 0; i < sentry_urma_ctx.local_eid_num_configured; i++) { + if (!sentry_urma_dev[i].is_created) { + pr_err("invalid value for sentry_urma_dev[%d].is_created\n", i); + break; + } + + if (is_not_single_die) + p += snprintf(p, CLIENT_INFO_BUF_MAX_LEN, "%s", ";"); + else + p += snprintf(p, CLIENT_INFO_BUF_MAX_LEN, "%s", "server_eid:"); + + for (j = 0; j < sentry_urma_dev[i].server_eid_valid_num; j++) { + p += snprintf(p, CLIENT_INFO_BUF_MAX_LEN - (p - sentry_urma_ctx.client_info_buf), + "%s%s", sentry_urma_dev[i].server_eid_array[j], + j != sentry_urma_dev[i].server_eid_valid_num - 1 ? "," : ""); + } + is_not_single_die = true; + } + + snprintf(p, CLIENT_INFO_BUF_MAX_LEN, ", client_jetty_id:%d\n", + sentry_urma_ctx.client_jetty_id); + } else { + snprintf(sentry_urma_ctx.client_info_buf, CLIENT_INFO_BUF_MAX_LEN, + "server_eid:%s, client_jetty_id:%d\n", "null", DEFAULT_INVALID_JETTY_ID); + } +} + +/** + * process_multi_eid_string - Process multiple EID strings from a buffer + * @eid_buf: Buffer containing EID strings + * @eid_array: Output array for EID strings + * @eid_tmp: Output array for parsed EIDs + * @sepstr: Separator string for tokenizing + * @eid_max_num: Maximum number of EIDs to process + * + * Return: Number of EIDs processed on success, negative error code on failure + * + * This function parses a buffer containing multiple EID strings separated by + * the specified separator and converts them to binary EID format. + */ +int process_multi_eid_string(char *eid_buf, char eid_array[][EID_MAX_LEN], + union ubcore_eid eid_tmp[], const char *sepstr, int eid_max_num) +{ + int ret; + int eid_num = 0; + char *eid_part; + + while ((eid_part = strsep(&eid_buf, sepstr)) != NULL) { + if (eid_num >= eid_max_num) { + pr_err("Invalid eid format: max num %d, current input exceeds\n", + eid_max_num); + return -EINVAL; + } + + if (strlen(eid_part) > EID_MAX_LEN) { + pr_err("Invalid eid format: str too long: %s\n", eid_part); + return -EINVAL; + } + + ret = str_to_eid(eid_part, &eid_tmp[eid_num]); + if (ret) { + pr_err("Invalid eid format: eid str %s\n", eid_part); + return -EINVAL; + } + + memcpy(eid_array[eid_num], eid_part, EID_MAX_LEN); + eid_num++; + } + + return eid_num; +} +EXPORT_SYMBOL(process_multi_eid_string); + +/** + * process_server_eid_str - Process server EID string for multiple dies + * @server_buf: Buffer containing server EID strings + * @server_ub_eid_tmp: Output array for parsed server EIDs + * @server_eid_valid_num: Output array for valid EID counts per die + * + * Return: 0 on success, negative error code on failure + * + * This function processes server EID strings for multiple dies, validating + * that local EIDs match the configured values. + */ +static int process_server_eid_str(char *server_buf, + union ubcore_eid server_ub_eid_tmp[MAX_DIE_NUM][MAX_NODE_NUM], + int *server_eid_valid_num) +{ + int ret; + int die_index = 0; + char *single_server_eid_part; + + while ((single_server_eid_part = strsep(&server_buf, ";")) != NULL) { + if (die_index >= MAX_DIE_NUM) { + pr_err("Invalid eid format: max num %d, current input exceeds\n", + MAX_DIE_NUM); + return -EINVAL; + } + + if (strlen(single_server_eid_part) > SINGLE_SERVER_PART_LEN) { + pr_err("Invalid server eid format: str too long: %s\n", + single_server_eid_part); + return -EINVAL; + } + + ret = process_multi_eid_string(single_server_eid_part, + sentry_urma_dev[die_index].server_eid_array, + server_ub_eid_tmp[die_index], ",", MAX_NODE_NUM); + if (ret < 0) + return ret; + + server_eid_valid_num[die_index] = ret; + + /* Verify local EID in server EID matches configured EID */ + if (memcmp(&server_ub_eid_tmp[die_index][0], + &sentry_urma_dev[die_index].local_eid, + sizeof(union ubcore_eid)) != 0) { + pr_err("Error: local eid in server eid %llx%llx does not match configured eid %llx%llx\n", + server_ub_eid_tmp[die_index][0].in6.subnet_prefix, + server_ub_eid_tmp[die_index][0].in6.interface_id, + sentry_urma_dev[die_index].local_eid.in6.subnet_prefix, + sentry_urma_dev[die_index].local_eid.in6.interface_id); + return -EINVAL; + } + die_index++; + } + + return 0; +} + +/** + * proc_client_info_write - Write handler for client info proc file + * @file: proc file pointer + * @user_buf: user space buffer + * @count: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + * + * This function processes client information input from userspace, including + * server EIDs and client jetty ID, and configures the URMA resources accordingly. + */ +static ssize_t proc_client_info_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int n = 0; + int ret; + union ubcore_eid server_ub_eid_tmp[MAX_DIE_NUM][MAX_NODE_NUM]; + int server_eid_valid_num[MAX_DIE_NUM] = {0}; + uint32_t client_jetty_id; + int i; + + if (count > CLIENT_INFO_MAX_LEN - 1) { + pr_err("invalid server eid info, max len %d, actual %lu\n", + CLIENT_INFO_MAX_LEN - 1, count); + return -EINVAL; + } + + if (copy_from_user(sentry_urma_ctx.kbuf, user_buf, count)) { + pr_err("failed parse client info input: copy_from_user failed\n"); + return -EFAULT; + } + sentry_urma_ctx.kbuf[count] = '\0'; + pr_info("proc_client_info_write kbuf is %s\n", sentry_urma_ctx.kbuf); + + /* + * Parse server EID part and client jetty ID part + * ((39 + 1) * 32 - 1) * 2 + 1 = 2559 + */ + ret = sscanf(sentry_urma_ctx.kbuf, "%2559[^ ] %6[^\n]%n", + sentry_urma_ctx.server_buf_part, + sentry_urma_ctx.client_jetty_id_part, + &n); + if (ret != 2) { + pr_err("Invalid msg str format and parse client info failed! str [%s]\n", + sentry_urma_ctx.kbuf); + return -EINVAL; + } + + /* Process server EIDs */ + ret = process_server_eid_str(sentry_urma_ctx.server_buf_part, + server_ub_eid_tmp, server_eid_valid_num); + if (ret) + return ret; + + /* Determine number of configured server EIDs */ + for (i = 0; i < MAX_DIE_NUM; i++) { + if (server_eid_valid_num[i] == 0) + break; + sentry_urma_ctx.server_eid_num_configured = i + 1; + } + + if (sentry_urma_ctx.server_eid_num_configured > + sentry_urma_ctx.local_eid_num_configured) { + pr_err("server eid num %d > local eid num %d\n", + sentry_urma_ctx.server_eid_num_configured, + sentry_urma_ctx.local_eid_num_configured); + return -EINVAL; + } + + /* Process client jetty ID */ + ret = kstrtou32(sentry_urma_ctx.client_jetty_id_part, 10, &client_jetty_id); + if (ret < 0) { + pr_err("Invalid format for client_jetty_id, str %s\n", + sentry_urma_ctx.client_jetty_id_part); + return -EINVAL; + } + + if (client_jetty_id < MIN_JETTY_ID || client_jetty_id > MAX_JETTY_ID) { + pr_err("client_jetty_id %u out of range [%d, %d]\n", + client_jetty_id, MIN_JETTY_ID, MAX_JETTY_ID); + return -EINVAL; + } + pr_info("client_jetty_id is %u\n", client_jetty_id); + + /* Update global configuration */ + sentry_urma_ctx.is_valid_client_info = true; + sentry_urma_ctx.client_jetty_id = client_jetty_id; + + for (i = 0; i < MAX_DIE_NUM; i++) { + memcpy(sentry_urma_dev[i].server_eid, server_ub_eid_tmp[i], + sizeof(union ubcore_eid) * MAX_NODE_NUM); + sentry_urma_dev[i].server_eid_valid_num = server_eid_valid_num[i]; + } + + /* Import URMA resources */ + ret = import(); + if (ret != 0) { + pr_err("ubcore import failed\n"); + return -EINVAL; + } + + return count; +} + +/** + * proc_client_info_show - Read handler for client info proc file + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + * + * This function displays the current client configuration including server EIDs + * and client jetty ID in a human-readable format. + */ +static ssize_t proc_client_info_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + format_client_info_show_str(); + return simple_read_from_buffer(buf, count, ppos, + sentry_urma_ctx.client_info_buf, + strlen(sentry_urma_ctx.client_info_buf)); +} + +static const struct proc_ops proc_client_info_file_operations = { + .proc_read = proc_client_info_show, + .proc_write = proc_client_info_write, +}; + +/** + * proc_heartbeat_write - Write handler for heartbeat control proc file + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + * + * This function controls the heartbeat thread based on user input ("on" or "off"). + * It starts or stops the heartbeat monitoring thread accordingly. + */ +static ssize_t proc_heartbeat_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for /proc/%s/%s, only 'off' or 'on' allowed\n", + PROC_DEVICE_PATH, PROC_HEARTBEAT_SWITCH); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set /proc/%s/%s failed\n", PROC_DEVICE_PATH, PROC_HEARTBEAT_SWITCH); + return -EFAULT; + } + + /* Remove trailing newline if present */ + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + if (!g_is_created_ubcore_resource) { + sentry_urma_ctx.heartbeat_enable = false; + pr_warn("Failed to start heartbeat: local eid not set\n"); + return -EINVAL; + } + + sentry_urma_ctx.hb_thread = kthread_run(heartbeat_thread, NULL, "urma_hb_all"); + if (IS_ERR(sentry_urma_ctx.hb_thread)) { + sentry_urma_ctx.heartbeat_enable = false; + pr_err("failed to start heartbeat thread\n"); + sentry_urma_ctx.hb_thread = NULL; + return -EINVAL; + } + sentry_urma_ctx.heartbeat_enable = true; + pr_info("heartbeat thread enabled\n"); + + } else if (strcmp(enable_str, "off") == 0) { + sentry_urma_ctx.heartbeat_enable = false; + pr_info("heartbeat thread disabled\n"); + + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + } + } else { + pr_err("invalid value for /proc/%s/%s\n", + PROC_DEVICE_PATH, PROC_HEARTBEAT_SWITCH); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_heartbeat_show - Read handler for heartbeat control proc file + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + * + * This function displays the current heartbeat thread status ("on" or "off"). + */ +static ssize_t proc_heartbeat_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + const char *status = sentry_urma_ctx.heartbeat_enable ? "on" : "off"; + size_t len = sentry_urma_ctx.heartbeat_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +static const struct proc_ops proc_heartbeat_file_operations = { + .proc_read = proc_heartbeat_show, + .proc_write = proc_heartbeat_write, +}; + +/** + * heartbeat_thread - Heartbeat monitoring thread function + * @arg: thread argument (unused) + * + * Return: 0 on thread exit + * + * This function implements the heartbeat monitoring mechanism for URMA nodes. + * It periodically sends heartbeat messages, checks for acknowledgments, and + * attempts to rebuild connections to unresponsive nodes. + */ +static int heartbeat_thread(void *arg) +{ + int i, cnt; + int die_index; + + while (!kthread_should_stop()) { + if (!sentry_urma_ctx.heartbeat_enable) { + msleep_interruptible(HB_WAIT_ACK_SLEEP_MS); + continue; + } + + uint64_t start_time = ktime_get_ns(); + + /* Reset heartbeat acknowledgment status for all nodes */ + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) + atomic_set(&sentry_urma_dev[die_index].urma_hb_ack_list[i], 0); + } + pr_info("start to detect heartbeat\n"); + + /* Send heartbeat to inactive nodes */ + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + bool need_rebuild[MAX_NODE_NUM] = {false}; + bool rebuilt = false; + + if (!sentry_urma_dev[die_index].is_created) + break; + + /* sentry_urma_dev[die_index].server_eid_array[0] is local_eid */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + pr_info("send heartbeat to node %d (eid=%s)\n", i, + sentry_urma_dev[die_index].server_eid_array[i]); + sentry_post_jetty_send_wr(HEARTBEAT, strlen(HEARTBEAT) + 1, i, die_index); + } + + msleep_interruptible(HB_WAIT_ACK_SLEEP_MS); + + /* Check for heartbeat acknowledgments */ + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + memset(sentry_urma_ctx.heartbeat_thread_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + cnt = sentry_poll_jfc(sentry_urma_dev[die_index].sender_jfc, + MAX_NODE_NUM, sentry_urma_ctx.heartbeat_thread_cr, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + if (cnt > 0) { + for (int k = 0; k < cnt; k++) + pr_info("heartbeat cr[%d].status=%d\n", k, sentry_urma_ctx.heartbeat_thread_cr[k].status); + } + + /* Check final heartbeat result and rebuild if needed */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + if (!atomic_read(&sentry_urma_dev[die_index].urma_hb_ack_list[i])) { + /* Link down, try to rebuild link */ + pr_info("Failed to detect heartbeat of node %d (eid=%s), start rebuild link\n", + i, sentry_urma_dev[die_index].server_eid_array[i]); + if (rebuild_tjetty(i, die_index) == 0) { + pr_info("after rebuild, retry heartbeat for node %d (eid=%s)\n", + i, sentry_urma_dev[die_index].server_eid_array[i]); + sentry_post_jetty_send_wr(HEARTBEAT, strlen(HEARTBEAT) + 1, + i, die_index); + need_rebuild[i] = true; + rebuilt = true; + } + } else { + pr_info("succeed to detect heartbeat of node %d (eid=%s)\n", + i, sentry_urma_dev[die_index].server_eid_array[i]); + } + } + + /* Verify rebuilt connections */ + if (rebuilt) { + msleep_interruptible(HB_WAIT_ACK_SLEEP_MS); + memset(sentry_urma_ctx.heartbeat_thread_cr, 0, sizeof(sentry_urma_ctx.heartbeat_thread_cr)); + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + sentry_poll_jfc(sentry_urma_dev[die_index].sender_jfc, + MAX_NODE_NUM, sentry_urma_ctx.heartbeat_thread_cr, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + pr_info("check rebuilt node heartbeat\n"); + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + if (!need_rebuild[i]) + continue; + + pr_info("node[%s] heartbeat recover %s\n", + sentry_urma_dev[die_index].server_eid_array[i], + !atomic_read(&sentry_urma_dev[die_index].urma_hb_ack_list[i]) ? + "failed" : "success"); + } + } + } + + /* Calculate sleep time to maintain heartbeat interval */ + int msleep_time = HEARTBEAT_INTERVAL_MS - + (int)((ktime_get_ns() - start_time) / NSEC_PER_MSEC); + + if (msleep_time > 0) + msleep_interruptible(msleep_time); + } + + return 0; +} + +/** + * sentry_poll_jfc - Poll completion queue for heartbeat acknowledgments + * @jfc: Jetty completion queue to poll + * @cr_cnt: Maximum number of completions to retrieve + * @cr: Array to store completions + * @die_index: Die index for resource access + * + * Return: Number of completions retrieved, negative on error + * + * This function polls the completion queue for heartbeat acknowledgments + * and updates the remote receive counters for successful completions. + */ +static int sentry_poll_jfc(struct ubcore_jfc *jfc, int cr_cnt, struct ubcore_cr *cr, + int die_index) +{ + int cnt; + int k; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + cnt = ubcore_poll_jfc(jfc, cr_cnt, cr); + if (cnt <= 0) + return cnt; + + /* Process successful completions */ + for (k = 0; k < cnt; k++) { + int idx = -1; + int tmp_die_index = die_index; + + if (cr[k].status == 0) { + match_index_by_remote_ub_eid(cr[k].remote_id.eid, &idx, &tmp_die_index); + if (idx >= 0) + atomic_inc(&sentry_urma_dev[tmp_die_index].remote_recv_cnt[idx]); + } + } + + return cnt; +} + +/** + * update_remote_recv_cnt - Update remote receive counters by polling completion queue + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function polls the sender completion queue to update the remote + * receive counters for the specified die index. + */ +static int update_remote_recv_cnt(int die_index) +{ + int cnt; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + if (!sentry_urma_ctx.is_panic_mode && !mutex_trylock(&sentry_urma_mutex)) + return -EBUSY; + + memset(sentry_urma_ctx.update_recv_cnt_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + cnt = sentry_poll_jfc(sentry_urma_dev[die_index].sender_jfc, MAX_NODE_NUM, sentry_urma_ctx.update_recv_cnt_cr, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + if (cnt < 0) { + pr_err("update_remote_recv_cnt: poll sender_jfc error, ret %d\n", cnt); + return -EFAULT; + } + + return 0; +} + +/** + * rebuild_tjetty - Rebuild a target jetty for a specific node + * @idx: Node index to rebuild + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function rebuilds a target jetty for a specific node when connectivity + * issues are detected. It creates a new tjetty, replaces the old one, and + * resets the send/receive counters. + */ +static int rebuild_tjetty(int idx, int die_index) +{ + struct ubcore_tjetty *tjetty_tmp = NULL; + struct ubcore_tjetty *tjetty_to_clear = NULL; + struct ubcore_tjetty_cfg cfg = { + .id.id = sentry_urma_ctx.client_jetty_id, + .id.eid = sentry_urma_dev[die_index].server_eid[idx], + .trans_mode = UBCORE_TP_RM, + .type = UBCORE_JETTY, + }; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) { + pr_debug("rebuild_tjetty: lock busy, skipping node %d, eid %s\n", + idx, sentry_urma_dev[die_index].server_eid_array[idx]); + return -EBUSY; + } + + tjetty_tmp = create_tjetty(&cfg, idx, die_index); + if (IS_ERR_OR_NULL(tjetty_tmp)) { + urma_mutex_lock_op(URMA_UNLOCK); + pr_err("rebuild_tjetty: tjetty[%d] ubcore_import_jetty_ex err, eid %s\n", + idx, sentry_urma_dev[die_index].server_eid_array[idx]); + return -EFAULT; + } + + /* Replace old tjetty if it exists */ + if (sentry_urma_dev[die_index].tjetty[idx]) + tjetty_to_clear = sentry_urma_dev[die_index].tjetty[idx]; + + sentry_urma_dev[die_index].tjetty[idx] = tjetty_tmp; + + /* Reset counters */ + atomic_set(&sentry_urma_dev[die_index].send_cnt[idx], 0); + atomic_set(&sentry_urma_dev[die_index].remote_recv_cnt[idx], 0); + + /* Clean up old tjetty */ + if (tjetty_to_clear) + ubcore_unimport_jetty(tjetty_to_clear); + + /* Repost receive work request */ + sentry_post_recv(sentry_urma_dev[die_index].jetty, + sentry_urma_dev[die_index].r_seg, idx, die_index); + + urma_mutex_lock_op(URMA_UNLOCK); + pr_info("rebuild_tjetty: tjetty[%d] rebuilt OK\n", idx); + return 0; +} + +/** + * check_and_rebuild_single_tjetty - Check and rebuild tjetty if needed + * @idx: Node index to check + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure or if rebuild not needed + * + * This function checks the send and receive counters for a specific node and + * rebuilds the tjetty if the difference exceeds the rebuild threshold. + * It also handles counter overflow by resetting when they reach maximum values. + */ +static int check_and_rebuild_single_tjetty(int idx, int die_index) +{ + int ret = 0; + int scnt, rcnt; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + scnt = atomic_read(&sentry_urma_dev[die_index].send_cnt[idx]); + rcnt = atomic_read(&sentry_urma_dev[die_index].remote_recv_cnt[idx]); + + /* Check if rebuild threshold is exceeded */ + if (scnt - rcnt > URMA_REBUILD_THRESHOLD) { + pr_info("tjetty[%d] %s check failed: send_cnt=%d, remote_recv_cnt=%d, rebuild\n", + idx, sentry_urma_dev[die_index].server_eid_array[idx], scnt, rcnt); + /* Reset counters and rebuild */ + atomic_set(&sentry_urma_dev[die_index].send_cnt[idx], 0); + atomic_set(&sentry_urma_dev[die_index].remote_recv_cnt[idx], 0); + ret = rebuild_tjetty(idx, die_index); + } + + /* Handle counter overflow */ + if (scnt > URMA_CNT_MAX_NUM && rcnt > URMA_CNT_MAX_NUM) { + atomic_set(&sentry_urma_dev[die_index].send_cnt[idx], 0); + atomic_set(&sentry_urma_dev[die_index].remote_recv_cnt[idx], 0); + } + + return ret; +} + +/** + * sentry_post_jetty_send_wr - Post a send work request to a jetty + * @buf: Data buffer to send + * @len: Length of data to send + * @tjetty_idx: Target jetty index + * @die_index: Die index for resource access + * + * Return: 0 on success, negative error code on failure + * + * This function posts a send work request to the specified target jetty, + * copying the data to the send segment and updating the send counters. + */ +static int sentry_post_jetty_send_wr(const char *buf, size_t len, int tjetty_idx, + int die_index) +{ + int ret; + struct ubcore_jfs_wr *bad_wr = NULL; + struct ubcore_tjetty *tj_i; + uint64_t s_seg_va_i; + size_t actual_len; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) { + pr_debug("sentry_post_jetty_send_wr: lock busy, skipping %d\n", tjetty_idx); + return 0; + } + + tj_i = sentry_urma_dev[die_index].tjetty[tjetty_idx]; + + if (!sentry_urma_dev[die_index].jetty) { + pr_err("jetty not created! Please establish a link first\n"); + urma_mutex_lock_op(URMA_UNLOCK); + return COMM_PARM_NOT_SET; + } + + if (!tj_i) { + urma_mutex_lock_op(URMA_UNLOCK); + return -ENODEV; + } + + /* Configure send work request */ + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].opcode = UBCORE_OPC_SEND; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].tjetty = tj_i; + s_seg_va_i = (uint64_t)sentry_urma_dev[die_index].s_seg_va + + (SGE_MAX_LEN * tjetty_idx); + + /* Copy data to send segment */ + ret = snprintf((char *)s_seg_va_i, len, "%s", buf); + if ((size_t)ret >= len) { + pr_err("sentry_post_jetty_send_wr: send str size exceeds max\n"); + urma_mutex_lock_op(URMA_UNLOCK); + return -EINVAL; + } + + /* Set up scatter-gather element */ + actual_len = strnlen((char *)s_seg_va_i, len - 1) + 1; + sentry_urma_dev[die_index].s_sge[tjetty_idx].addr = s_seg_va_i; + sentry_urma_dev[die_index].s_sge[tjetty_idx].len = actual_len; + sentry_urma_dev[die_index].s_sge[tjetty_idx].tseg = + sentry_urma_dev[die_index].s_seg; + + /* Configure work request */ + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].send.src.sge = + &sentry_urma_dev[die_index].s_sge[tjetty_idx]; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].send.src.num_sge = 1; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].user_ctx = s_seg_va_i; + sentry_urma_dev[die_index].jfs_wr[tjetty_idx].flag.bs.complete_enable = 1; + + /* Post send work request */ + ret = ubcore_post_jetty_send_wr(sentry_urma_dev[die_index].jetty, + &sentry_urma_dev[die_index].jfs_wr[tjetty_idx], + &bad_wr); + if (ret) { + pr_err("ubcore_post_jetty_send_wr err\n"); + } else { + atomic_inc(&sentry_urma_dev[die_index].send_cnt[tjetty_idx]); + pr_info("ubcore_post_jetty_send_wr success\n"); + } + + urma_mutex_lock_op(URMA_UNLOCK); + return ret; +} + +/** + * urma_send_to_all_nodes - Send data to all configured nodes + * @buf: Data buffer to send + * @len: Length of data to send + * @die_index: Die index for resource access + * + * Return: Number of successful sends, negative error code on failure + * + * This function sends data to all configured remote nodes for a specific die, + * performing necessary checks and potential tjetty rebuilds before sending. + */ +static int urma_send_to_all_nodes(const char *buf, size_t len, int die_index) +{ + int cnt = 0; + int i; + + if (!buf || len == 0) + return -EINVAL; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + /* Update remote receive counters */ + if (update_remote_recv_cnt(die_index)) + return -EFAULT; + + /* sentry_urma_dev[die_index].server_eid[0] is local_eid */ + for (i = 1; i < sentry_urma_dev[die_index].server_eid_valid_num; i++) { + int ret = 0; + + /* Check and rebuild tjetty if needed (skip in panic mode) */ + if (!sentry_urma_ctx.is_panic_mode) + ret = check_and_rebuild_single_tjetty(i, die_index); + + if (!ret) { + pr_info("start to send msg [%s] to [%s]\n", buf, + sentry_urma_dev[die_index].server_eid_array[i]); + ret = sentry_post_jetty_send_wr(buf, len, i, die_index); + } + + if (ret == COMM_PARM_NOT_SET) + return COMM_PARM_NOT_SET; + + if (ret == 0) + cnt++; + } + + return cnt; +} + +/** + * urma_send_to_given_node - Send data to a specific node + * @buf: Data buffer to send + * @len: Length of data to send + * @dst_eid: Destination EID string + * @die_index: Die index for resource access (-1 if unknown) + * + * Return: 1 on successful send, 0 if not sent, negative error code on failure + * + * This function sends data to a specific node identified by EID, performing + * necessary validation and potential tjetty rebuild before sending. + */ +static int urma_send_to_given_node(const char *buf, size_t len, + const char *dst_eid, int die_index) +{ + int cnt = 0; + int ret; + int node_idx = -1; + union ubcore_eid dst_ubcore_eid; + + if (!buf || len == 0 || !dst_eid) + return -EINVAL; + + /* Convert EID string to binary format */ + if (str_to_eid(dst_eid, &dst_ubcore_eid) < 0) { + pr_err("urma_send: invalid dst eid [%s]\n", dst_eid); + return -EINVAL; + } + + /* Find node and die indices */ + match_index_by_remote_ub_eid(dst_ubcore_eid, &node_idx, &die_index); + if (node_idx < 0) { + pr_warn("urma_send: msg format invalid, str [%s]\n", buf); + return 0; + } + + /* Update remote receive counters */ + ret = update_remote_recv_cnt(die_index); + if (ret) + return ret; + + /* Check and rebuild tjetty if needed (skip in panic mode) */ + if (!sentry_urma_ctx.is_panic_mode) + ret = check_and_rebuild_single_tjetty(node_idx, die_index); + + if (!ret) { + pr_info("start to send msg [%s] to [%s]\n", buf, dst_eid); + ret = sentry_post_jetty_send_wr(buf, len, node_idx, die_index); + } + + if (!ret) + cnt++; + + return cnt; +} + +/** + * urma_send - Send data to URMA nodes + * @buf: Data buffer to send + * @len: Length of data to send + * @dst_eid: Destination EID (NULL for broadcast to all nodes) + * @die_index: Die index (-1 for auto-detect, 0/1 for specific die) + * + * Return: Number of successful sends, negative error code on failure + * + * This function provides the main interface for sending data via URMA, + * supporting both broadcast and unicast modes. + */ +int urma_send(const char *buf, size_t len, const char *dst_eid, int die_index) +{ + int cnt = 0; + + if (!g_is_created_ubcore_resource) + return -ENODEV; + + if (!dst_eid && die_index >= 0) { + /* Broadcast mode: send to all nodes */ + cnt = urma_send_to_all_nodes(buf, len, die_index); + } else { + /* Unicast mode: send to specific node */ + cnt = urma_send_to_given_node(buf, len, dst_eid, die_index); + } + + return cnt; +} +EXPORT_SYMBOL(urma_send); + +/** + * urma_recv - Receive data from URMA nodes + * @buf_arr: Array of buffers to store received messages + * @len: Maximum length for each received message + * + * Return: Number of valid messages received, negative error code on failure + * + * This function polls for incoming messages, handles heartbeat protocol, + * and returns valid event messages to the caller. + */ +int urma_recv(char **buf_arr, size_t len) +{ + int ret; + int valid_msg_num = 0; + char recv_msg[URMA_SEND_DATA_MAX_LEN] = {0}; + int die_index; + + if (!buf_arr) + return -EINVAL; + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + return -EBUSY; + + if (!g_is_created_ubcore_resource) { + urma_mutex_lock_op(URMA_UNLOCK); + return -ENODEV; + } + urma_mutex_lock_op(URMA_UNLOCK); + + /* Check each die for incoming messages */ + for (die_index = 0; die_index < sentry_urma_ctx.local_eid_num_configured; die_index++) { + int cnt; + memset(sentry_urma_ctx.urma_recv_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + if (!sentry_urma_dev[die_index].is_created) { + urma_mutex_lock_op(URMA_UNLOCK); + break; + } + + cnt = ubcore_poll_jfc(sentry_urma_dev[die_index].receiver_jfc, + MAX_NODE_NUM, sentry_urma_ctx.urma_recv_cr); + urma_mutex_lock_op(URMA_UNLOCK); + + if (cnt < 0) { + pr_err("urma_recv: ubcore_poll_jfc failed for eid %s, ret %d\n", + sentry_urma_dev[die_index].server_eid_array[0], cnt); + continue; + } else if (cnt == 0) { + /* No messages available */ + continue; + } + + /* Process each completion */ + for (int i = 0; i < cnt; i++) { + int node_idx = -1; + int tmp_die_index = die_index; + + /* Extract message from completion context */ + ret = snprintf(recv_msg, len, "%s", (char *)sentry_urma_ctx.urma_recv_cr[i].user_ctx); + if ((size_t)ret >= len) { + pr_warn("urma recv: msg size exceeds max len %lu\n", len); + continue; + } + + /* Match remote EID to node index */ + match_index_by_remote_ub_eid(sentry_urma_ctx.urma_recv_cr[i].remote_id.eid, &node_idx, &tmp_die_index); + if (node_idx < 0) { + pr_warn("urma_recv: cr[%d] eid (%llx, %x, %x) not matched, msg [%s]\n", + i, sentry_urma_ctx.urma_recv_cr[i].remote_id.eid.in4.reserved, + sentry_urma_ctx.urma_recv_cr[i].remote_id.eid.in4.prefix, + sentry_urma_ctx.urma_recv_cr[i].remote_id.eid.in4.addr, recv_msg); + continue; + } + + pr_info("urma_recv: cr[%d] get msg [%s] from node[%d] eid=%s\n", + i, recv_msg, node_idx, + sentry_urma_dev[tmp_die_index].server_eid_array[node_idx]); + + /* Handle different message types */ + if (!strcmp(recv_msg, HEARTBEAT)) { + /* Heartbeat request - send acknowledgment */ + pr_info("urma_recv: received heartbeat from node[%d] eid=%s, send ack\n", + node_idx, sentry_urma_dev[tmp_die_index].server_eid_array[node_idx]); + sentry_post_jetty_send_wr(HEARTBEAT_ACK, strlen(HEARTBEAT_ACK) + 1, + node_idx, tmp_die_index); + + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + memset(sentry_urma_ctx.urma_recv_sender_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); + sentry_poll_jfc(sentry_urma_dev[tmp_die_index].sender_jfc, + MAX_NODE_NUM, sentry_urma_ctx.urma_recv_sender_cr, tmp_die_index); + urma_mutex_lock_op(URMA_UNLOCK); + } else if (!strcmp(recv_msg, HEARTBEAT_ACK)) { + /* Heartbeat acknowledgment - update status */ + pr_info("urma_recv: received heartbeat ack from node[%d] eid=%s\n", + node_idx, sentry_urma_dev[tmp_die_index].server_eid_array[node_idx]); + atomic_set(&sentry_urma_dev[tmp_die_index].urma_hb_ack_list[node_idx], 1); + } else { + /* Event message - store for caller */ + memcpy(buf_arr[valid_msg_num], recv_msg, sizeof(recv_msg)); + valid_msg_num++; + } + + /* Repost receive work request */ + if (!sentry_urma_ctx.is_panic_mode && + !mutex_trylock(&sentry_urma_mutex)) + continue; + + ret = sentry_post_recv(sentry_urma_dev[tmp_die_index].jetty, + sentry_urma_dev[tmp_die_index].r_seg, + node_idx, tmp_die_index); + urma_mutex_lock_op(URMA_UNLOCK); + + if (ret < 0) + pr_warn("urma_recv: sentry_post_recv failed, ret %d\n", ret); + } + } + + return valid_msg_num; +} +EXPORT_SYMBOL(urma_recv); + +/** + * reboot_cleanup_notifier - System reboot notifier callback + * @nb: Notifier block + * @action: Reboot action + * @data: Notifier data + * + * Return: NOTIFY_DONE + * + * This function ensures proper cleanup of URMA resources during system reboot. + */ +static int reboot_cleanup_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + if (action == SYS_RESTART && sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + return NOTIFY_DONE; +} + +static struct notifier_block reboot_cleanup_nb = { + .notifier_call = reboot_cleanup_notifier, + .priority = INT_MAX, +}; + +/** + * sentry_urma_comm_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + * + * This function initializes the URMA communication module, creating proc + * files, allocating buffers, and registering reboot notifier. + */ +static int __init sentry_urma_comm_init(void) +{ + int ret = 0; + + sentry_urma_ctx.proc_dir = proc_mkdir_mode(PROC_DEVICE_PATH, 0550, NULL); + if (!sentry_urma_ctx.proc_dir) { + pr_err("create /proc/%s dir failed\n", PROC_DEVICE_PATH); + return -ENOMEM; + } + + ret |= sentry_create_proc_file(PROC_DEVICE_NAME, sentry_urma_ctx.proc_dir, + &proc_client_info_file_operations); + ret |= sentry_create_proc_file(PROC_HEARTBEAT_SWITCH, sentry_urma_ctx.proc_dir, + &proc_heartbeat_file_operations); + if (ret < 0) + goto remove_proc_dir; + + ret = init_global_char(); + if (ret) + goto remove_proc_dir; + + ret = register_reboot_notifier(&reboot_cleanup_nb); + if (ret) { + pr_info("reboot_cleanup_nb register failed: %d\n", ret); + goto free_mem; + } + + pr_info("reboot_cleanup_nb registered\n"); + return 0; + +free_mem: + free_global_char(); +remove_proc_dir: + proc_remove(sentry_urma_ctx.proc_dir); + return ret; +} + +/** + * sentry_urma_comm_exit - Module cleanup function + * + * This function cleans up all URMA resources, stops threads, and removes + * proc files during module unload. + */ +static void __exit sentry_urma_comm_exit(void) +{ + unregister_reboot_notifier(&reboot_cleanup_nb); + pr_info("reboot_cleanup_nb unregistered\n"); + + if (sentry_urma_ctx.hb_thread) { + kthread_stop(sentry_urma_ctx.hb_thread); + sentry_urma_ctx.hb_thread = NULL; + pr_info("urma_hb_all thread stopped\n"); + } + + release_all_resource(); + + if (sentry_urma_ctx.proc_dir) + proc_remove(sentry_urma_ctx.proc_dir); + + pr_info("ubcore release\n"); + free_global_char(); +} + +module_init(sentry_urma_comm_init); +module_exit(sentry_urma_comm_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("luckky"); +MODULE_DESCRIPTION("Kernel module to transport msg via URMA"); diff --git a/drivers/ub/sentry/smh_common_type.h b/drivers/ub/sentry/smh_common_type.h index 9ae54a2b0a43..5b90543c72e7 100644 --- a/drivers/ub/sentry/smh_common_type.h +++ b/drivers/ub/sentry/smh_common_type.h @@ -16,9 +16,16 @@ #include #define SMH_TYPE ('}') +#define MAX_DIE_NUM 2 #define OOM_EVENT_MAX_NUMA_NODES 8 +#define MAX_NODE_NUM 32 +#define EID_MAX_LEN 40 // eid str len 39 + '\0' #define REPORT_COMM_TIME 5000 +#define URMA_SEND_DATA_MAX_LEN (2 + EID_MAX_LEN * 2 + 10 + 1 + 4) // type_cna_eid_randomID_res #define MILLISECONDS_OF_EACH_MDELAY 1000 +#define COMM_PARM_NOT_SET (-2) +#define HEARTBEAT "heartbeat" +#define HEARTBEAT_ACK "heartbeat_ack" #define ENABLE_VALUE_MAX_LEN 4 // 'off' + '\0' #define URMA_REBUILD_THRESHOLD 3 -- Gitee From 49e2189b585cf21e2f8bd1fda8b219f34b595fde Mon Sep 17 00:00:00 2001 From: shixuantong Date: Mon, 10 Nov 2025 20:55:31 +0800 Subject: [PATCH 27/48] drivers/ub/sentry: add sentry_uvb_comm module commit 662401f649f9a0ff394dfd226173d6e2d945256d openEuler Provides UVB communication functionality and it dependent on cis and odf modules. Signed-off-by: guodashun Signed-off-by: shixuantong Signed-off-by: shixuantong --- drivers/ub/sentry/Makefile | 1 + drivers/ub/sentry/sentry_uvb_comm.c | 214 ++++++++++++++++++++++++++++ drivers/ub/sentry/smh_common_type.h | 8 ++ 3 files changed, 223 insertions(+) create mode 100644 drivers/ub/sentry/sentry_uvb_comm.c diff --git a/drivers/ub/sentry/Makefile b/drivers/ub/sentry/Makefile index f4d4464a552b..249bec765991 100644 --- a/drivers/ub/sentry/Makefile +++ b/drivers/ub/sentry/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_UB_SENTRY) += sentry_msg_helper.o obj-$(CONFIG_UB_SENTRY) += sentry_reporter.o obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_urma_comm.o +obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_uvb_comm.o sentry_msg_helper-y := smh_core.o smh_message.o diff --git a/drivers/ub/sentry/sentry_uvb_comm.c b/drivers/ub/sentry/sentry_uvb_comm.c new file mode 100644 index 000000000000..ef6af702263d --- /dev/null +++ b/drivers/ub/sentry/sentry_uvb_comm.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: support UVB communication + * Author: sxt1001 + * Create: 2025-04-23 + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "smh_common_type.h" +#include "smh_message.h" + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][uvb]: " fmt + +uint32_t g_local_cna = -1; +EXPORT_SYMBOL(g_local_cna); + +static struct proc_dir_entry *uvb_proc_dir; +static char *g_kbuf_server_cna; // cna1;cna2;cna3...cnan +uint32_t g_server_cna_array[MAX_NODE_NUM]; +int g_server_cna_valid_num; +EXPORT_SYMBOL(g_server_cna_array); +EXPORT_SYMBOL(g_server_cna_valid_num); + +/* + * @brief send data to server by UVB + * + * @param1: Data to be sent + * @param2: Indicates the CNA information of the specified server. + * If dst_cna is greater than CNA_MAX_VALUE, no server is + * specified. In this case, data needs to be sent to all nodes. + * @param3: UVB mode. If env is in panic status, We need to use + * synchronization mode, set is_sync to true. + * @return Number of nodes that are successfully sent + * */ +int uvb_send(const char *str, uint32_t dst_cna, bool is_sync) +{ + int res, cnt = 0; + + struct cis_message msg; + + msg.input = (char *)str; + msg.input_size = strlen(str) + 1; + msg.output = NULL; + msg.p_output_size = NULL; + + if (dst_cna < CNA_MAX_VALUE) { // dst cna is valid, send data to specific node + res = cis_call_by_uvb(UBIOS_CALL_ID_PANIC_CALL, UVB_SENDER_ID_SYSSENTRY, + UVB_RECEIVER_ID_SYSSENTRY(dst_cna), &msg, is_sync); + if (res != 0) { + pr_err("Send to a specified node, cis_call_by_uvb send data %s to %u failed.\n", str, dst_cna); + return -1; + } + cnt++; + pr_info("Send to a specified node, cis_call_by_uvb send data %s to %u success.\n", str, dst_cna); + return cnt; + } + + // dst_cna is invalid, send data to all nodes. + for (int i = 0; i < g_server_cna_valid_num; i++) { + if (g_server_cna_array[i] < CNA_MAX_VALUE) { + pr_info("Broadcast mode. receiver cna is %d, received id is %#x.\n", g_server_cna_array[i], + UVB_RECEIVER_ID_SYSSENTRY(g_server_cna_array[i])); + res = cis_call_by_uvb(UBIOS_CALL_ID_PANIC_CALL, UVB_SENDER_ID_SYSSENTRY, + UVB_RECEIVER_ID_SYSSENTRY(g_server_cna_array[i]), &msg, is_sync); + if (res != 0) { + pr_err("cis_call_by_uvb send data %s to %u failed.\n", str, g_server_cna_array[i]); + continue; + } + pr_info("cis_call_by_uvb send data %s to %u success.\n", str, g_server_cna_array[i]); + cnt++; + } + } + return cnt; +} +EXPORT_SYMBOL(uvb_send); + +static int convert_server_cna_str_to_u32_array(const char *server_cna) +{ + int server_cna_valid_num = 0, ret = 0; + uint32_t server_cna_array[MAX_NODE_NUM]; + char *token; + + char *server_cna_copy = kstrdup(server_cna, GFP_KERNEL); + char *rest = server_cna_copy; + + while ((token = strsep(&rest, ";"))) { + if (server_cna_valid_num >= MAX_NODE_NUM) { + pr_err("Invalid format for server_cna: cna max num is %d, the current input server_cna exceeds %d nodes.\n", MAX_NODE_NUM, MAX_NODE_NUM); + kfree(server_cna_copy); + return -EINVAL; + } + if (*token != '\0') { + ret = kstrtou32(token, 10, &server_cna_array[server_cna_valid_num]); + if (ret < 0) { + pr_err("Invalid format for server cna, str is %s\n", token); + kfree(server_cna_copy); + return -EINVAL; + } + if (server_cna_array[server_cna_valid_num] > CNA_MAX_VALUE) { + pr_err("Found invalid cna (%s), it should not be greater than %d\n", token, CNA_MAX_VALUE); + kfree(server_cna_copy); + return -EINVAL; + } + ++server_cna_valid_num; + } + } + pr_info("server cna num is %d\n", server_cna_valid_num); + + kfree(server_cna_copy); + + // input server_cna is valid, start to update global variables such as g_server_cna_valid_num and g_server_cna_array + g_server_cna_valid_num = server_cna_valid_num; + for (int i = 0; i < g_server_cna_valid_num; i++) { + g_server_cna_array[i] = server_cna_array[i]; + } + return 0; +} + +static ssize_t proc_uvb_server_cna_show(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, g_kbuf_server_cna, strlen(g_kbuf_server_cna)); +} + +static ssize_t proc_uvb_server_cna_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int ret = 0; + char server_cna_buf[(MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN]; + + if (count > (MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN - 1) { + pr_err("invalid value for server_cna mode.\n"); + return -EINVAL; + } + if (copy_from_user(server_cna_buf, user_buf, count)) { + pr_err("failed parse client info input: copy_from_user failed.\n"); + return -EFAULT; + } + server_cna_buf[count] = '\0'; + pr_info("proc_uvb_server_cna_write server_cna is %s\n", server_cna_buf); + + ret = convert_server_cna_str_to_u32_array(server_cna_buf); + if (ret) { + pr_err("convert_server_cna_str_to_u32_array failed\n"); + return -EINVAL; + } + snprintf(g_kbuf_server_cna, (MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN, "%s", server_cna_buf); + return count; +} + +static const struct proc_ops proc_uvb_server_cna_file_operations = { + .proc_read = proc_uvb_server_cna_show, + .proc_write = proc_uvb_server_cna_write, +}; + +static int __init uvb_comm_init(void) +{ + int ret = 0; + + for (int i = 0; i < MAX_NODE_NUM; i++) { + g_server_cna_array[i] = (uint32_t)-1; + } + + uvb_proc_dir = proc_mkdir_mode("sentry_uvb_comm", PROC_DIR_PERMISSION, NULL); + if (!uvb_proc_dir) { + pr_err("create /proc/sentry_uvb_comm dir failed.\n"); + return -ENOMEM; + } + + ret = sentry_create_proc_file("server_cna", uvb_proc_dir, &proc_uvb_server_cna_file_operations); + if (ret == -ENOMEM) { + goto remove_uvb_proc_dir; + } + + g_kbuf_server_cna = kzalloc((MAX_NODE_NUM + 1) * INTEGER_TO_STR_MAX_LEN, GFP_KERNEL); + if (!g_kbuf_server_cna) { + pr_err("kzalloc g_kbuf_server_cna failed!\n"); + ret = -ENOMEM; + goto remove_uvb_proc_dir; + } + pr_info("uvb communication is enabled.\n"); + return 0; + +remove_uvb_proc_dir: + proc_remove(uvb_proc_dir); + return ret; +} + +static void __exit uvb_comm_exit(void) +{ + if (uvb_proc_dir) { + proc_remove(uvb_proc_dir); + } + if (g_kbuf_server_cna) { + kfree(g_kbuf_server_cna); + g_kbuf_server_cna = NULL; + } + pr_info("uvb communication module unloaded\n"); +} + +module_init(uvb_comm_init); +module_exit(uvb_comm_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("sxt1001"); +MODULE_DESCRIPTION("Kernel module to send msg via UVB"); diff --git a/drivers/ub/sentry/smh_common_type.h b/drivers/ub/sentry/smh_common_type.h index 5b90543c72e7..fd8ed418928a 100644 --- a/drivers/ub/sentry/smh_common_type.h +++ b/drivers/ub/sentry/smh_common_type.h @@ -9,6 +9,7 @@ #ifndef SMH_COMMON_TYPE_H #define SMH_COMMON_TYPE_H +#include #include #include #include @@ -23,6 +24,8 @@ #define REPORT_COMM_TIME 5000 #define URMA_SEND_DATA_MAX_LEN (2 + EID_MAX_LEN * 2 + 10 + 1 + 4) // type_cna_eid_randomID_res #define MILLISECONDS_OF_EACH_MDELAY 1000 +#define CNA_MAX_VALUE 0xffffff +#define INTEGER_TO_STR_MAX_LEN 22 #define COMM_PARM_NOT_SET (-2) #define HEARTBEAT "heartbeat" #define HEARTBEAT_ACK "heartbeat_ack" @@ -71,6 +74,11 @@ struct sentry_msg_helper_msg { unsigned long res; }; +extern uint32_t g_local_cna; +#define UVB_SENDER_ID_SYSSENTRY_INDEX (g_local_cna) +#define UVB_SENDER_ID_SYSSENTRY (UBIOS_USER_ID_RICH_OS | UVB_SENDER_ID_SYSSENTRY_INDEX) +#define UVB_RECEIVER_ID_SYSSENTRY(cna) (UBIOS_USER_ID_UB_DEVICE | (cna)) + static inline int sentry_create_proc_file(const char *name, struct proc_dir_entry *parent, const struct proc_ops *proc_ops) { -- Gitee From d1ec8fbeb78383047dbcbb5a460bbf69439b9c71 Mon Sep 17 00:00:00 2001 From: shixuantong Date: Sat, 29 Nov 2025 11:57:38 +0800 Subject: [PATCH 28/48] drivers/ub/sentry: add sentry_remote_reporter module commit f1edc5dcf26f382f2b30bd35a504caf5035a9dc8 openEuler 1. report panic event to main node by URMA or UVB 2. report reboot event to main node by URMA or UVB Signed-off-by: guodashun Signed-off-by: shixuantong Signed-off-by: shixuantong --- arch/arm64/configs/tencent.config | 1 + drivers/ub/sentry/Makefile | 2 + drivers/ub/sentry/sentry_remote_client.c | 1120 ++++++++++++++++++++ drivers/ub/sentry/sentry_remote_reporter.h | 73 ++ drivers/ub/sentry/sentry_remote_server.c | 445 ++++++++ drivers/ub/sentry/smh_common_type.h | 96 ++ 6 files changed, 1737 insertions(+) create mode 100644 drivers/ub/sentry/sentry_remote_client.c create mode 100644 drivers/ub/sentry/sentry_remote_reporter.h create mode 100644 drivers/ub/sentry/sentry_remote_server.c diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index f99a133749e3..d5b9de64db60 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1849,6 +1849,7 @@ CONFIG_OBMM=m # UB sentry CONFIG_UB_SENTRY=m +CONFIG_UB_SENTRY_REMOTE=m # end of UB sentry # URMA diff --git a/drivers/ub/sentry/Makefile b/drivers/ub/sentry/Makefile index 249bec765991..0c084b61a2bc 100644 --- a/drivers/ub/sentry/Makefile +++ b/drivers/ub/sentry/Makefile @@ -6,5 +6,7 @@ obj-$(CONFIG_UB_SENTRY) += sentry_msg_helper.o obj-$(CONFIG_UB_SENTRY) += sentry_reporter.o obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_urma_comm.o obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_uvb_comm.o +obj-$(CONFIG_UB_SENTRY_REMOTE) += sentry_remote_reporter.o sentry_msg_helper-y := smh_core.o smh_message.o +sentry_remote_reporter-y := sentry_remote_server.o sentry_remote_client.o diff --git a/drivers/ub/sentry/sentry_remote_client.c b/drivers/ub/sentry/sentry_remote_client.c new file mode 100644 index 000000000000..da4b46f12afe --- /dev/null +++ b/drivers/ub/sentry/sentry_remote_client.c @@ -0,0 +1,1120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Client module, used for reporting panic or reboot events. + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" +#include "sentry_remote_reporter.h" + +#define PANIC_TIMEOUT_MS_MIN 0 +#define PANIC_TIMEOUT_MS_MAX 3600000 +#define KERNEL_REBOOT_TIMEOUT_MS_MIN 0 +#define KERNEL_REBOOT_TIMEOUT_MS_MAX 3600000 +#define LOCAL_EID_MAX_LEN (EID_MAX_LEN * 2 + 1 + 1) + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][remote client]: " fmt + +struct sentry_client_context { + char eid_str[MAX_DIE_NUM][EID_MAX_LEN]; + char eid_raw_str[LOCAL_EID_MAX_LEN]; /* for proc show */ + union ubcore_eid eid[MAX_DIE_NUM]; + int die_num_configured; + + struct proc_dir_entry *panic_proc_dir; + char **msg_str; + + unsigned long panic_timeout_ms; + unsigned long kernel_reboot_timeout_ms; + + bool panic_enable; + bool kernel_reboot_enable; + bool use_uvb; + bool use_urma; + + bool is_in_panic_status; + + uint32_t random_id; + + bool is_uvb_cis_func_registered; +}; + +static struct sentry_client_context sentry_client_ctx = { + .die_num_configured = MAX_DIE_NUM, + .panic_timeout_ms = 35000, + .kernel_reboot_timeout_ms = 35000, + .panic_enable = false, + .kernel_reboot_enable = false, + .use_uvb = true, + .use_urma = true, + .is_in_panic_status = false, + .random_id = 0, + .is_uvb_cis_func_registered = false, +}; + +/** + * strcmp_local_eid_from_msg - Compare message EID with local EIDs + * @msg_eid: EID from message to compare + * + * Return: true if EID matches a local EID, false otherwise + * + * This function checks if the provided EID matches any of the + * configured local EIDs. + */ +static bool strcmp_local_eid_from_msg(const char *msg_eid) +{ + for (int i = 0; i < sentry_client_ctx.die_num_configured; i++) { + if (strlen(sentry_client_ctx.eid_str[i]) == 0) { + pr_err("local_eid should have %d values, but %d-th value is empty\n", + sentry_client_ctx.die_num_configured, i); + break; + } + if (strncmp(msg_eid, sentry_client_ctx.eid_str[i], EID_MAX_LEN) == 0) + return true; + } + return false; +} + +/** + * get_ack_done - Check if acknowledgment is complete for local node + * @msg: Message to check + * @ack_type: Expected acknowledgment type + * @comm_type: Communication type + * + * Return: true if acknowledgment is complete, false otherwise + * + * This function verifies if the received acknowledgment message + * matches the expected parameters for the local node. + */ +static bool get_ack_done(const struct sentry_msg_helper_msg *msg, + enum sentry_msg_helper_msg_type ack_type, + enum SENTRY_REMOTE_COMM_TYPE comm_type) +{ + if (msg->type == ack_type && + msg->helper_msg_info.remote_info.cna == g_local_cna && + strcmp_local_eid_from_msg(msg->helper_msg_info.remote_info.eid)) { + pr_info("Receive ack message%s: [%d_%u_%s_%lu]\n", + (comm_type == COMM_TYPE_URMA) ? " from URMA" : + (comm_type == COMM_TYPE_UVB) ? " from UVB" : "", + msg->type, + g_local_cna, + msg->helper_msg_info.remote_info.eid, + msg->res); + return true; + } + return false; +} + +/** + * remote_event_handler - Handle remote event sending and acknowledgment + * @remote_type: Type of remote event + * @timeout_ms: Timeout in milliseconds + * + * Return: 0 on success, negative error code on failure + * + * This function handles the sending of remote events (panic/reboot) and + * waits for acknowledgments from remote nodes, supporting both URMA and UVB. + */ +int remote_event_handler(enum sentry_msg_helper_msg_type remote_type, + unsigned long timeout_ms) +{ + int ret; + bool uvb_send_success = false; + bool urma_send_success = false; + enum sentry_msg_helper_msg_type remote_ack_type; + char send_data[MAX_DIE_NUM][URMA_SEND_DATA_MAX_LEN]; + uint64_t start_count, current_count; + uint64_t code_run_count, code_run_times_ms; + uint64_t counts_per_sec = arch_timer_get_cntfrq(); + uint64_t timeout_counts = timeout_ms / 1000 * counts_per_sec; + bool ack_done = false; + int recv_msg_nodes; + int times = timeout_ms / MILLISECONDS_OF_EACH_MDELAY; + + /* Prepare send data for each die */ + for (int i = 0; i < sentry_client_ctx.die_num_configured; i++) { + if (strlen(sentry_client_ctx.eid_str[i]) == 0) { + pr_err("local_eid should have %d values, but %d-th value is empty\n", + sentry_client_ctx.die_num_configured, i); + return NOTIFY_OK; + } + + ret = snprintf(send_data[i], URMA_SEND_DATA_MAX_LEN - 1, + "%d_%u_%s_%lu_%u", remote_type, g_local_cna, + sentry_client_ctx.eid_str[i], timeout_ms, + sentry_client_ctx.random_id); + if ((size_t)ret >= URMA_SEND_DATA_MAX_LEN - 1) { + pr_err("msg str size exceeds the max value\n"); + return NOTIFY_OK; + } + } + + remote_ack_type = get_ack_type(remote_type); + if (remote_ack_type == SMH_MESSAGE_UNKNOWN) + return -EINVAL; + + start_count = read_sysreg(cntpct_el0); + + /* Main event sending and acknowledgment loop */ + for (int i = 0; i < times; i++) { + current_count = read_sysreg(cntpct_el0); + if (current_count - start_count >= timeout_counts) + break; + + /* Send via URMA if enabled */ + if (sentry_client_ctx.use_urma) { + for (int j = 0; j < sentry_client_ctx.die_num_configured; j++) { + if (strlen(sentry_client_ctx.eid_str[j]) == 0) + break; + + ret = urma_send(send_data[j], sizeof(send_data[j]), NULL, j); + if (ret > 0) { + urma_send_success = true; + pr_info("URMA send data [%s] [%d]: SUCCESS. die index %d\n", + send_data[j], i + 1, j); + } + } + } + + /* Send via UVB if enabled */ + if (sentry_client_ctx.use_uvb) { + ret = uvb_send(send_data[0], -1, + sentry_client_ctx.is_in_panic_status ? true : false); + if (ret > 0) { + uvb_send_success = true; + pr_info("UVB send data [%s] [%d]: SUCCESS\n", send_data[0], i + 1); + } + } + + /* Handle send failure */ + if (!urma_send_success && !uvb_send_success) { + pr_warn("UVB && URMA send data [%s]: FAILED\n", send_data[0]); + if (sentry_client_ctx.is_in_panic_status) + mdelay(MILLISECONDS_OF_EACH_MDELAY); + else + msleep(MILLISECONDS_OF_EACH_MDELAY); + continue; + } + + if (!sentry_client_ctx.is_in_panic_status) { + /* Not in panic status, check shared buffer */ + if (atomic_read(&sentry_remote_ctx.remote_event_ack_done) != 1) { + msleep(MILLISECONDS_OF_EACH_MDELAY); + continue; + } + + spin_lock(&sentry_buf_lock); + ack_done = get_ack_done(&sentry_remote_ctx.remote_event_ack_msg_buf, + remote_ack_type, COMM_TYPE_UNKNOWN); + spin_unlock(&sentry_buf_lock); + continue; + } + /* Handle acknowledgment in panic mode */ + if (uvb_send_success) { + /* In panic status, UVB uses sync mode */ + void *data = NULL; + ret = uvb_polling_sync(data); + + if (ret < 0 && ret != -ETIMEDOUT) { + pr_err("uvb_poll_window_sync failed\n"); + } else if (ret == -ETIMEDOUT) { + pr_info("uvb_polling_sync timeout\n"); + } else if (ret == 0) { + /* uvb_polling_sync success */ + if (atomic_read(&sentry_remote_ctx.remote_event_ack_done) != 1) + goto do_urma_recv; + + spin_lock(&sentry_buf_lock); + ack_done = get_ack_done(&sentry_remote_ctx.remote_event_ack_msg_buf, + remote_ack_type, COMM_TYPE_UVB); + spin_unlock(&sentry_buf_lock); + } + } + +do_urma_recv: + if (urma_send_success) { + /* In panic status, poll URMA directly */ + recv_msg_nodes = urma_recv(sentry_client_ctx.msg_str, + URMA_SEND_DATA_MAX_LEN); + if (recv_msg_nodes <= 0) + continue; + pr_info("urma received %d nodes\n", recv_msg_nodes); + for (int l = 0; l < recv_msg_nodes; l++) { + struct sentry_msg_helper_msg msg; + uint32_t random_id_stub; + + if (strcmp(HEARTBEAT, sentry_client_ctx.msg_str[l]) == 0 || + strcmp(HEARTBEAT_ACK, sentry_client_ctx.msg_str[l]) == 0) + continue; + + /* Convert and check acknowledgment */ + ret = convert_str_to_smh_msg(sentry_client_ctx.msg_str[l], + &msg, &random_id_stub); + if (ret) { + pr_warn("convert urma data failed: [%s]\n", + sentry_client_ctx.msg_str[l]); + continue; + } + ack_done = get_ack_done(&msg, remote_ack_type, + COMM_TYPE_URMA); + if (ack_done) + break; + } + } + + /* Check if acknowledgment received */ + if (ack_done) { + pr_info("Receive ack message, stop blocking early\n"); + break; + } + + pr_debug("No ACK for %d polling, wait %d ms\n", + i, MILLISECONDS_OF_EACH_MDELAY); + + /* Calculate precise sleep time */ + code_run_count = read_sysreg(cntpct_el0) - current_count; + code_run_times_ms = code_run_count * 1000 / counts_per_sec; + + if (code_run_times_ms < MILLISECONDS_OF_EACH_MDELAY) { + int sleep_time = MILLISECONDS_OF_EACH_MDELAY - code_run_times_ms; + if (sentry_client_ctx.is_in_panic_status) + mdelay(sleep_time); + else + msleep(sleep_time); + } + } + + return 0; +} + +/** + * check_if_eid_cna_is_set - Check if EID and CNA are properly configured + * + * Return: 0 if properly configured, -EINVAL otherwise + * + * This function validates that both CNA and EID are properly set + * before attempting to send remote events. + */ +static int check_if_eid_cna_is_set(void) +{ + size_t eid_len = strlen(sentry_client_ctx.eid_raw_str); + + if (g_local_cna > CNA_MAX_VALUE || eid_len == 0) { + pr_err("cna or eid not set, ignore current event\n"); + return -EINVAL; + } + return 0; +} + +/** + * check_if_urma_or_uvb_is_ready - Check if URMA or UVB communication is ready + * + * Return: 0 if at least one communication method is ready, -ENODEV otherwise + * + * This function checks the availability of URMA and UVB communication + * channels and updates the usage flags accordingly. + */ +static int check_if_urma_or_uvb_is_ready(void) +{ + if (sentry_client_ctx.use_urma && !g_is_created_ubcore_resource) { + pr_info("URMA not ready, disable URMA communication\n"); + sentry_client_ctx.use_urma = false; + } + + if (sentry_client_ctx.use_uvb && !(g_server_cna_valid_num > 0)) { + pr_warn("UVB not ready, disable UVB communication\n"); + sentry_client_ctx.use_uvb = false; + } + + if (!(sentry_client_ctx.use_urma || sentry_client_ctx.use_uvb)) { + pr_err("both urma and uvb not connected, ignore current event\n"); + return -ENODEV; + } + + return 0; +} + +/** + * panic_handler - Panic notifier handler + * @nb: Notifier block + * @code: Panic code + * @unused: Unused parameter + * + * Return: NOTIFY_OK + * + * This function handles system panic events by sending panic notifications + * to remote nodes and waiting for acknowledgments. + */ +int panic_handler(struct notifier_block *nb, unsigned long code, void *unused) +{ + if (!sentry_client_ctx.panic_enable) + return NOTIFY_OK; + + sentry_client_ctx.is_in_panic_status = true; + pr_info("Panic handler: received panic message\n"); + + if (check_if_eid_cna_is_set() || check_if_urma_or_uvb_is_ready()) + return NOTIFY_OK; + + pr_info("panic_timeout_ms %lu, cna [%u], eid [%s]\n", + sentry_client_ctx.panic_timeout_ms, g_local_cna, + sentry_client_ctx.eid_raw_str); + + set_urma_panic_mode(true); + remote_event_handler(SMH_MESSAGE_PANIC, sentry_client_ctx.panic_timeout_ms); + pr_info("Panic handler: Blocking finished\n"); + + return NOTIFY_OK; +} + +/** + * kernel_reboot_handler - Kernel reboot notifier handler + * @nb: Notifier block + * @code: Reboot code + * @unused: Unused parameter + * + * Return: NOTIFY_OK + * + * This function handles kernel reboot events by sending reboot notifications + * to remote nodes and waiting for acknowledgments. + */ +int kernel_reboot_handler(struct notifier_block *nb, unsigned long code, void *unused) +{ + if (!sentry_client_ctx.kernel_reboot_enable) + return NOTIFY_OK; + + pr_info("kernel reboot handler: received kernel reboot message\n"); + + if (check_if_eid_cna_is_set() || check_if_urma_or_uvb_is_ready()) + return NOTIFY_OK; + + pr_info("kernel_reboot_timeout_ms %lu, cna [%u], eid [%s]\n", + sentry_client_ctx.kernel_reboot_timeout_ms, g_local_cna, + sentry_client_ctx.eid_raw_str); + + set_urma_panic_mode(false); + remote_event_handler(SMH_MESSAGE_KERNEL_REBOOT, + sentry_client_ctx.kernel_reboot_timeout_ms); + pr_info("Kernel reboot handler: Blocking finished\n"); + + /* Stop URMA thread proactively */ + sentry_panic_reporter_exit(); + return NOTIFY_OK; +} + +/** + * proc_panic_reporter_enable_file_show - Show panic reporter enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_panic_reporter_enable_file_show(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.panic_enable ? "on" : "off"; + size_t len = sentry_client_ctx.panic_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_kernel_reboot_reporter_enable_file_show - Show kernel reboot reporter enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_reporter_enable_file_show(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.kernel_reboot_enable ? "on" : "off"; + size_t len = sentry_client_ctx.kernel_reboot_enable ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_reporter_cna_show - Show local CNA value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_reporter_cna_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char cna_str[INTEGER_TO_STR_MAX_LEN]; + + snprintf(cna_str, sizeof(cna_str), "%u\n", g_local_cna); + return simple_read_from_buffer(buf, count, ppos, cna_str, strlen(cna_str)); +} + +/** + * proc_reporter_eid_show - Show local EID value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_reporter_eid_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, + sentry_client_ctx.eid_raw_str, + strlen(sentry_client_ctx.eid_raw_str)); +} + +/** + * proc_panic_enable_file_write - Write handler for panic enable control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_panic_enable_file_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for panic mode, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set panic mode failed\n"); + return -EFAULT; + } + + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + if (!crash_kexec_post_notifiers) { + pr_warn("crash_kexec_post_notifiers disabled, cannot enable panic event\n"); + return -EPERM; + } + sentry_client_ctx.panic_enable = true; + } else if (strcmp(enable_str, "off") == 0) { + sentry_client_ctx.panic_enable = false; + } else { + pr_err("invalid value for panic mode\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_kernel_reboot_enable_file_write - Write handler for kernel reboot enable control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_enable_file_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for kernel_reboot mode, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set kernel_reboot mode failed\n"); + return -EFAULT; + } + + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + sentry_client_ctx.kernel_reboot_enable = true; + } else if (strcmp(enable_str, "off") == 0) { + sentry_client_ctx.kernel_reboot_enable = false; + } else { + pr_err("invalid value for kernel_reboot mode\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_uvb_comm_file_show - Show UVB communication enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_uvb_comm_file_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.use_uvb ? "on" : "off"; + size_t len = sentry_client_ctx.use_uvb ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_urma_comm_file_show - Show URMA communication enable status + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_urma_comm_file_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + const char *status = sentry_client_ctx.use_urma ? "on" : "off"; + size_t len = sentry_client_ctx.use_urma ? 2 : 3; + + return simple_read_from_buffer(buf, count, ppos, status, len); +} + +/** + * proc_uvb_comm_file_write - Write handler for UVB communication control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_uvb_comm_file_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for uvb_comm, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set uvb_comm failed\n"); + return -EFAULT; + } + + /* Remove trailing newline if present */ + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + sentry_client_ctx.use_uvb = true; + } else if (strcmp(enable_str, "off") == 0) { + if (!sentry_client_ctx.use_urma) { + pr_err("Cannot disable both URMA and UVB comm modes\n"); + return -EINVAL; + } + sentry_client_ctx.use_uvb = false; + } else { + pr_err("invalid value for uvb_comm\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_urma_comm_file_write - Write handler for URMA communication control + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_urma_comm_file_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + char enable_str[ENABLE_VALUE_MAX_LEN + 1] = {0}; + + if (cnt > ENABLE_VALUE_MAX_LEN) { + pr_err("invalid value for urma_comm, only 'off' or 'on' allowed\n"); + return -EINVAL; + } + + ret = copy_from_user(enable_str, ubuf, cnt); + if (ret) { + pr_err("set urma_comm failed\n"); + return -EFAULT; + } + + if (cnt > 0 && enable_str[cnt - 1] == '\n') + enable_str[cnt - 1] = '\0'; + + if (strcmp(enable_str, "on") == 0) { + sentry_client_ctx.use_urma = true; + } else if (strcmp(enable_str, "off") == 0) { + if (!sentry_client_ctx.use_uvb) { + pr_err("Cannot disable both URMA and UVB comm modes\n"); + return -EINVAL; + } + sentry_client_ctx.use_urma = false; + } else { + pr_err("invalid value for urma_comm\n"); + return -EINVAL; + } + + return cnt; +} + +/** + * proc_panic_timeout_show - Show panic timeout value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_panic_timeout_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char timeout_str[INTEGER_TO_STR_MAX_LEN]; + + snprintf(timeout_str, sizeof(timeout_str), "%ld\n", + sentry_client_ctx.panic_timeout_ms); + return simple_read_from_buffer(buf, count, ppos, timeout_str, strlen(timeout_str)); +} + +/** + * proc_kernel_reboot_timeout_show - Show kernel reboot timeout value + * @file: proc file pointer + * @buf: user space buffer + * @count: number of bytes to read + * @ppos: file position + * + * Return: number of bytes read on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_timeout_show(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char timeout_str[INTEGER_TO_STR_MAX_LEN]; + + snprintf(timeout_str, sizeof(timeout_str), "%ld\n", + sentry_client_ctx.kernel_reboot_timeout_ms); + return simple_read_from_buffer(buf, count, ppos, timeout_str, strlen(timeout_str)); +} + +/** + * proc_reporter_cna_write - Write handler for CNA configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_reporter_cna_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + uint32_t val; + + ret = kstrtou32_from_user(ubuf, cnt, 10, &val); + if (ret) { + pr_err("parse input parameter for cna failed\n"); + return ret; + } + + if (val > CNA_MAX_VALUE) { + pr_err("set cna failed, max value is %u\n", CNA_MAX_VALUE); + return -EINVAL; + } + + if (sentry_client_ctx.is_uvb_cis_func_registered) { + /* Repeated registration will fail, unregister first */ + unregister_local_cis_func(UBIOS_CALL_ID_PANIC_CALL, UBIOS_USER_ID_UB_DEVICE); + } + + ret = register_local_cis_func(UBIOS_CALL_ID_PANIC_CALL, UBIOS_USER_ID_UB_DEVICE, + cis_ubios_remote_msg_cb); + if (ret) { + pr_err("uvb register function failed\n"); + return ret; + } + + sentry_client_ctx.is_uvb_cis_func_registered = true; + g_local_cna = val; + return cnt; +} + +/** + * proc_reporter_eid_write - Write handler for EID configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_reporter_eid_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + int eid_num = 0; + char eid_str_buf[LOCAL_EID_MAX_LEN]; + char eid_str_buf_tmp[LOCAL_EID_MAX_LEN]; + char eid_str_array[MAX_DIE_NUM][EID_MAX_LEN] = {0}; + union ubcore_eid eid_ub_buf[MAX_DIE_NUM] = {0}; + + if (cnt > LOCAL_EID_MAX_LEN) { + pr_err("invalid eid info, max len %d, actual %lu\n", + LOCAL_EID_MAX_LEN - 1, cnt); + return -EINVAL; + } + + ret = copy_from_user(eid_str_buf, ubuf, cnt); + if (ret) { + pr_err("set eid failed\n"); + return -EFAULT; + } + + if (cnt > 0 && eid_str_buf[cnt - 1] == '\n') + eid_str_buf[cnt - 1] = '\0'; + + if (cnt == LOCAL_EID_MAX_LEN && eid_str_buf[cnt - 1] != '\0') { + pr_err("invalid eid info, max len %d, actual %lu\n", + LOCAL_EID_MAX_LEN - 1, cnt); + return -EINVAL; + } + + memcpy(eid_str_buf_tmp, eid_str_buf, LOCAL_EID_MAX_LEN); + ret = process_multi_eid_string(eid_str_buf_tmp, eid_str_array, eid_ub_buf, + ";", MAX_DIE_NUM); + if (ret < 0) + return ret; + + eid_num = ret; + ret = sentry_create_urma_resource(eid_ub_buf, eid_num); + if (ret) + return ret; + + /* Valid EID, update global EID */ + for (int i = 0; i < eid_num; i++) { + memcpy(&sentry_client_ctx.eid[i], &eid_ub_buf[i], + sizeof(union ubcore_eid)); + snprintf(sentry_client_ctx.eid_str[i], EID_MAX_LEN, "%s", + eid_str_array[i]); + } + + sentry_client_ctx.die_num_configured = eid_num; + memcpy(sentry_client_ctx.eid_raw_str, eid_str_buf, LOCAL_EID_MAX_LEN); + return cnt; +} + +/** + * proc_panic_timeout_write - Write handler for panic timeout configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_panic_timeout_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + unsigned long val; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) { + pr_err("invalid value for panic_timeout\n"); + return ret; + } + + if (val < PANIC_TIMEOUT_MS_MIN || val > PANIC_TIMEOUT_MS_MAX) { + pr_err("panic_timeout range [%d, %d], current %lu\n", + PANIC_TIMEOUT_MS_MIN, PANIC_TIMEOUT_MS_MAX, val); + return -EINVAL; + } + + sentry_client_ctx.panic_timeout_ms = val; + return cnt; +} + +/** + * proc_kernel_reboot_timeout_write - Write handler for kernel reboot timeout configuration + * @file: proc file pointer + * @ubuf: user space buffer + * @cnt: number of bytes to write + * @ppos: file position + * + * Return: number of bytes written on success, negative error code on failure + */ +static ssize_t proc_kernel_reboot_timeout_write(struct file *file, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int ret; + unsigned long val; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) { + pr_err("parse input parameter for kernel_reboot_timeout failed\n"); + return ret; + } + + if (val < KERNEL_REBOOT_TIMEOUT_MS_MIN || val > KERNEL_REBOOT_TIMEOUT_MS_MAX) { + pr_err("kernel_reboot_timeout range [%d, %d], current %lu\n", + KERNEL_REBOOT_TIMEOUT_MS_MIN, KERNEL_REBOOT_TIMEOUT_MS_MAX, val); + return -EINVAL; + } + + sentry_client_ctx.kernel_reboot_timeout_ms = val; + return cnt; +} + +/* Proc file operations structures */ +static const struct proc_ops proc_reporter_cna_file_operations = { + .proc_read = proc_reporter_cna_show, + .proc_write = proc_reporter_cna_write, +}; + +static const struct proc_ops proc_reporter_eid_file_operations = { + .proc_read = proc_reporter_eid_show, + .proc_write = proc_reporter_eid_write, +}; + +static const struct proc_ops proc_panic_enable_file_operations = { + .proc_read = proc_panic_reporter_enable_file_show, + .proc_write = proc_panic_enable_file_write, +}; + +static const struct proc_ops proc_kernel_reboot_enable_file_operations = { + .proc_read = proc_kernel_reboot_reporter_enable_file_show, + .proc_write = proc_kernel_reboot_enable_file_write, +}; + +static const struct proc_ops proc_uvb_comm_file_operations = { + .proc_read = proc_uvb_comm_file_show, + .proc_write = proc_uvb_comm_file_write, +}; + +static const struct proc_ops proc_urma_comm_file_operations = { + .proc_read = proc_urma_comm_file_show, + .proc_write = proc_urma_comm_file_write, +}; + +static const struct proc_ops proc_panic_timeout_file_operations = { + .proc_read = proc_panic_timeout_show, + .proc_write = proc_panic_timeout_write, +}; + +static const struct proc_ops proc_kernel_reboot_timeout_file_operations = { + .proc_read = proc_kernel_reboot_timeout_show, + .proc_write = proc_kernel_reboot_timeout_write, +}; + +/** + * init_sentry_remote_reporter_proc - Initialize proc filesystem entries + * + * Return: 0 on success, negative error code on failure + * + * This function creates all proc filesystem entries for the remote reporter + * module, allowing user-space configuration of various parameters. + */ +static int init_sentry_remote_reporter_proc(void) +{ + int ret = 0; + + sentry_client_ctx.panic_proc_dir = proc_mkdir_mode("sentry_remote_reporter", + PROC_DIR_PERMISSION, NULL); + if (!sentry_client_ctx.panic_proc_dir) { + pr_err("create /proc/sentry_remote_reporter dir failed\n"); + return -ENOMEM; + } + + ret |= sentry_create_proc_file("cna", sentry_client_ctx.panic_proc_dir, + &proc_reporter_cna_file_operations); + ret |= sentry_create_proc_file("eid", sentry_client_ctx.panic_proc_dir, + &proc_reporter_eid_file_operations); + ret |= sentry_create_proc_file("panic_timeout", sentry_client_ctx.panic_proc_dir, + &proc_panic_timeout_file_operations); + ret |= sentry_create_proc_file("kernel_reboot_timeout", + sentry_client_ctx.panic_proc_dir, + &proc_kernel_reboot_timeout_file_operations); + ret |= sentry_create_proc_file("panic", sentry_client_ctx.panic_proc_dir, + &proc_panic_enable_file_operations); + ret |= sentry_create_proc_file("kernel_reboot", sentry_client_ctx.panic_proc_dir, + &proc_kernel_reboot_enable_file_operations); + ret |= sentry_create_proc_file("uvb_comm", sentry_client_ctx.panic_proc_dir, + &proc_uvb_comm_file_operations); + ret |= sentry_create_proc_file("urma_comm", sentry_client_ctx.panic_proc_dir, + &proc_urma_comm_file_operations); + if (ret < 0) + proc_remove(sentry_client_ctx.panic_proc_dir); + + return ret; +} + +/* Notifier blocks for system events */ +static struct notifier_block panic_notifier = { + .notifier_call = panic_handler, + .priority = INT_MAX, +}; + +static struct notifier_block kernel_reboot_notifier = { + .notifier_call = kernel_reboot_handler, + .priority = INT_MAX, +}; + +/** + * sentry_remote_reporter_init - Module initialization function + * + * Return: 0 on success, negative error code on failure + * + * This function initializes the remote reporter module, including: + * - Generating random ID + * - Initializing panic reporter + * - Allocating message buffers + * - Registering system notifiers + * - Creating proc filesystem entries + */ +static int __init sentry_remote_reporter_init(void) +{ + int ret; + int i; + + sentry_client_ctx.random_id = get_random_u32(); + + ret = sentry_panic_reporter_init(); + if (ret) + return ret; + + sentry_client_ctx.msg_str = kzalloc(MAX_NODE_NUM * 2 * sizeof(char *), GFP_KERNEL); + if (!sentry_client_ctx.msg_str) { + pr_err("Failed to allocate memory for msg_str\n"); + ret = -ENOMEM; + goto stop_kthread; + } + + for (i = 0; i < MAX_NODE_NUM * 2; i++) { + sentry_client_ctx.msg_str[i] = kzalloc(URMA_SEND_DATA_MAX_LEN, GFP_KERNEL); + if (!sentry_client_ctx.msg_str[i]) { + pr_err("Failed to allocate memory for msg_str[%d]\n", i); + free_char_array(sentry_client_ctx.msg_str, i); + ret = -ENOMEM; + goto stop_kthread; + } + } + + ret = register_reboot_notifier(&kernel_reboot_notifier); + if (ret) { + pr_err("Failed to register kernel reboot handler: %d\n", ret); + goto free_msg_str; + } + pr_info("Kernel reboot handler registered\n"); + + ret = atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier); + if (ret) { + pr_err("Failed to register panic handler: %d\n", ret); + goto unregister_kernel_reboot; + } + + ret = init_sentry_remote_reporter_proc(); + if (ret) { + pr_err("Failed to create sentry_remote_reporter proc: %d\n", ret); + goto unregister_panic; + } + + pr_info("Panic handler registered\n"); + return 0; + +unregister_panic: + atomic_notifier_chain_unregister(&panic_notifier_list, &panic_notifier); +unregister_kernel_reboot: + unregister_reboot_notifier(&kernel_reboot_notifier); +free_msg_str: + free_char_array(sentry_client_ctx.msg_str, MAX_NODE_NUM * 2); +stop_kthread: + sentry_panic_reporter_exit(); + return ret; +} + +/** + * sentry_remote_reporter_exit - Module cleanup function + * + * This function cleans up all resources allocated by the remote reporter module, + * including unregistering notifiers, freeing memory, and removing proc entries. + */ +static void __exit sentry_remote_reporter_exit(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, &panic_notifier); + pr_info("Panic handler unregistered\n"); + + unregister_reboot_notifier(&kernel_reboot_notifier); + pr_info("Kernel reboot handler unregistered\n"); + + free_char_array(sentry_client_ctx.msg_str, MAX_NODE_NUM * 2); + + if (sentry_client_ctx.panic_proc_dir) + proc_remove(sentry_client_ctx.panic_proc_dir); + + sentry_panic_reporter_exit(); + + if (sentry_client_ctx.is_uvb_cis_func_registered) { + unregister_local_cis_func(UBIOS_CALL_ID_PANIC_CALL, UBIOS_USER_ID_UB_DEVICE); + pr_info("UVB CIS function unregistered\n"); + } +} + +module_init(sentry_remote_reporter_init); +module_exit(sentry_remote_reporter_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("sxt1001"); +MODULE_DESCRIPTION("sentry_remote_reporter module"); +MODULE_VERSION("1.0"); diff --git a/drivers/ub/sentry/sentry_remote_reporter.h b/drivers/ub/sentry/sentry_remote_reporter.h new file mode 100644 index 000000000000..5120e9512567 --- /dev/null +++ b/drivers/ub/sentry/sentry_remote_reporter.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * Description: Header File for sentry module + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#ifndef SENTRY_REMOTE_REPORTER_H +#define SENTRY_REMOTE_REPORTER_H + +#include +#include +#include +#include +#include + +#include "smh_common_type.h" + +extern void set_urma_panic_mode(bool is_panic); + +// sentry uvb global variable +extern uint32_t g_server_cna_array[MAX_NODE_NUM]; +extern int g_server_cna_valid_num; +extern int cis_ubios_remote_msg_cb(struct cis_message *cis_msg); + +// sentry urma global variable and functions +extern bool g_is_created_ubcore_resource; +extern int str_to_eid(const char *buf, union ubcore_eid *eid); +extern int match_index_by_remote_ub_eid(union ubcore_eid remote_id, int *node_index, int *die_index); +extern int sentry_create_urma_resource(union ubcore_eid eid[], int eid_num); +extern int process_multi_eid_string(char *eid_buf, char eid_array[][EID_MAX_LEN], + union ubcore_eid eid_tmp[], const char *sepstr, int eid_max_num); + +enum SENTRY_REMOTE_COMM_TYPE { + COMM_TYPE_URMA, + COMM_TYPE_UVB, + COMM_TYPE_UNKNOWN +}; + +struct child_thread_process_data { + struct sentry_msg_helper_msg *msg; + enum SENTRY_REMOTE_COMM_TYPE comm_type; + uint32_t random_id; +}; + +struct node_msg_info { + uint32_t random_id; + uint64_t start_send_time; + uint64_t msgid; +}; + +struct sentry_remote_context { + struct node_msg_info node_msg_info_list[MAX_NODE_NUM]; + struct sentry_msg_helper_msg remote_event_ack_msg_buf; + atomic_t remote_event_ack_received; + atomic_t remote_event_ack_done; + struct task_struct *urma_receiver_thread; +}; + +extern spinlock_t sentry_buf_lock; +extern struct sentry_remote_context sentry_remote_ctx; + +int sentry_panic_reporter_init(void); +void sentry_panic_reporter_exit(void); + +int send_msg_to_userspace_and_ack(struct sentry_msg_helper_msg *msg, enum SENTRY_REMOTE_COMM_TYPE comm_type, + uint32_t random_id, enum sentry_msg_helper_msg_type ack_type); + +void write_ack_msg_buf(const struct sentry_msg_helper_msg *msg, enum SENTRY_REMOTE_COMM_TYPE comm_type); +int create_kthread_to_process_msg(const char *event_msg, enum SENTRY_REMOTE_COMM_TYPE comm_type); +enum sentry_msg_helper_msg_type get_ack_type(enum sentry_msg_helper_msg_type event_type); +#endif diff --git a/drivers/ub/sentry/sentry_remote_server.c b/drivers/ub/sentry/sentry_remote_server.c new file mode 100644 index 000000000000..42b24a3a0bd9 --- /dev/null +++ b/drivers/ub/sentry/sentry_remote_server.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Description: Server module, used for reporting panic or reboot msg to the + * userspace and forward ack msg to the client + * Author: sxt1001 + * Create: 2025-03-18 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smh_message.h" +#include "sentry_remote_reporter.h" + +#undef pr_fmt +#define pr_fmt(fmt) "[sentry][remote server]: " fmt + +struct sentry_remote_context sentry_remote_ctx; +DEFINE_SPINLOCK(sentry_buf_lock); + +static DEFINE_MUTEX(sentry_msg_info_mutex); + +/** + * send_msg_to_userspace - Send message to userspace with proper tracking + * @msg: Message to send + * @comm_type: Communication type (URMA or UVB) + * @random_id: Random identifier for message tracking + * + * Return: 0 on success, negative error code on failure + * + * This function sends a message to userspace and tracks it using node message + * info for acknowledgment handling. + */ +int send_msg_to_userspace(struct sentry_msg_helper_msg *msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type, uint32_t random_id) +{ + int ret; + int node_idx = -1; + int die_index = -1; + union ubcore_eid dst_ubcore_eid; + + pr_info("send %s message to userspace\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb"); + + if (comm_type == COMM_TYPE_URMA) { + if (str_to_eid(msg->helper_msg_info.remote_info.eid, &dst_ubcore_eid) < 0) { + pr_err("send_msg_to_userspace: invalid dst eid [%s]\n", + msg->helper_msg_info.remote_info.eid); + return -EINVAL; + } + match_index_by_remote_ub_eid(dst_ubcore_eid, &node_idx, &die_index); + } else if (comm_type == COMM_TYPE_UVB) { + int i; + + for (i = 0; i < g_server_cna_valid_num; i++) { + if (msg->helper_msg_info.remote_info.cna == g_server_cna_array[i]) { + node_idx = i; + break; + } + } + } + + if (node_idx < 0) { + pr_err("Invalid cna: %u or eid: %s of msg, stop to send to userspace\n", + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid); + return -EINVAL; + } + + mutex_lock(&sentry_msg_info_mutex); + if (sentry_remote_ctx.node_msg_info_list[node_idx].random_id != random_id) { + pr_info("Get new message from cna: %u, eid: %s\n", + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid); + sentry_remote_ctx.node_msg_info_list[node_idx].start_send_time = ktime_get_ns(); + sentry_remote_ctx.node_msg_info_list[node_idx].msgid = smh_get_new_msg_id(); + sentry_remote_ctx.node_msg_info_list[node_idx].random_id = random_id; + } + msg->start_send_time = sentry_remote_ctx.node_msg_info_list[node_idx].start_send_time; + msg->msgid = sentry_remote_ctx.node_msg_info_list[node_idx].msgid; + mutex_unlock(&sentry_msg_info_mutex); + + ret = smh_message_send(msg, true); + return ret; +} + +/** + * send_msg_to_userspace_and_ack - Send message to userspace and wait for acknowledgment + * @msg: Message to send + * @comm_type: Communication type (URMA or UVB) + * @random_id: Random identifier for message tracking + * @ack_type: Type of acknowledgment expected + * + * Return: 0 on success, negative error code on failure + * + * This function sends a message to userspace, waits for acknowledgment, and + * sends acknowledgment back to the remote node. + */ +int send_msg_to_userspace_and_ack(struct sentry_msg_helper_msg *msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type, + uint32_t random_id, enum sentry_msg_helper_msg_type ack_type) +{ + int ret; + int times = msg->timeout_time / MILLISECONDS_OF_EACH_MDELAY; + int i, j; + + ret = send_msg_to_userspace(msg, comm_type, random_id); + if (ret) { + pr_err("Failed to send remote message to userspace\n"); + return ret; + } + + /* Wait for acknowledgment from userspace */ + for (i = 0; i < times; i++) { + uint64_t cur_time = ktime_get_ns(); + + ret = smh_message_get_ack(msg); + if (!ret) { + int sleep_time = MILLISECONDS_OF_EACH_MDELAY - + (int)((ktime_get_ns() - cur_time) / NSEC_PER_MSEC); + if (sleep_time > 0) + msleep_interruptible(sleep_time); + continue; + } + + /* Get acknowledgment success, send acknowledgment message */ + char send_ack[URMA_SEND_DATA_MAX_LEN]; + + ret = snprintf(send_ack, URMA_SEND_DATA_MAX_LEN, "%d_%u_%s_%lu", + ack_type, + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid, + msg->res); + if ((size_t)ret >= URMA_SEND_DATA_MAX_LEN) { + pr_err("msg str size exceeds the max value\n"); + return -EINVAL; + } + + pr_info("Start to send %s ack msg to %u\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", + msg->helper_msg_info.remote_info.cna); + + if (comm_type == COMM_TYPE_URMA) { + /* Retry URMA acknowledgment sending */ + for (j = 0; j < URMA_ACK_RETRY_NUM; j++) { + ret = urma_send(send_ack, sizeof(send_ack), + msg->helper_msg_info.remote_info.eid, -1); + if (ret == COMM_PARM_NOT_SET) + break; + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY); + } + } else { + /* UVB is a reliable protocol, no need to resend */ + ret = uvb_send(send_ack, msg->helper_msg_info.remote_info.cna, false); + } + + if (ret <= 0) { + pr_warn("Failed to send %s ack message to client (cna:%u, eid:%s)\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid); + return -EFAULT; + } + return 0; + } + + return -ETIMEDOUT; +} + +/** + * get_ack_type - Get acknowledgment type for given event type + * @event_type: Event type to get acknowledgment for + * + * Return: Corresponding acknowledgment type + */ +enum sentry_msg_helper_msg_type get_ack_type(enum sentry_msg_helper_msg_type event_type) +{ + enum sentry_msg_helper_msg_type ack_type; + + switch (event_type) { + case SMH_MESSAGE_PANIC: + ack_type = SMH_MESSAGE_PANIC_ACK; + break; + case SMH_MESSAGE_KERNEL_REBOOT: + ack_type = SMH_MESSAGE_KERNEL_REBOOT_ACK; + break; + default: + pr_warn("Invalid event type!\n"); + ack_type = SMH_MESSAGE_UNKNOWN; + } + + return ack_type; +} + +/** + * process_remote_event_msg - Process remote event message in kernel thread context + * @data: Pointer to child_thread_process_data structure containing message info + * + * Return: 0 on success, negative error code on failure + */ +static int process_remote_event_msg(void *data) +{ + int ret; + enum sentry_msg_helper_msg_type ack_type; + struct child_thread_process_data *child_data = data; + + try_module_get(THIS_MODULE); + + ack_type = get_ack_type(child_data->msg->type); + if (ack_type == SMH_MESSAGE_UNKNOWN) { + ret = -EINVAL; + goto cleanup_child; + } + + ret = send_msg_to_userspace_and_ack(child_data->msg, child_data->comm_type, + child_data->random_id, ack_type); + +cleanup_child: + kfree(child_data->msg); + kfree(child_data); + module_put(THIS_MODULE); + return ret; +} + +/** + * write_ack_msg_buf - Write acknowledgment message to shared buffer + * @msg: Acknowledgment message + * @comm_type: Communication type (URMA or UVB) + * + * This function writes an acknowledgment message to a shared buffer for + * inter-process communication, ensuring thread-safe access. + */ +void write_ack_msg_buf(const struct sentry_msg_helper_msg *msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type) +{ + if (atomic_inc_return(&sentry_remote_ctx.remote_event_ack_received) == 1) { + pr_info("Receive ack message from %s: [%d_%u_%s_%lu]. Start to update buf\n", + comm_type == COMM_TYPE_URMA ? "URMA" : "UVB", + msg->type, + msg->helper_msg_info.remote_info.cna, + msg->helper_msg_info.remote_info.eid, + msg->res); + + spin_lock(&sentry_buf_lock); + memcpy(&sentry_remote_ctx.remote_event_ack_msg_buf, msg, + sizeof(sentry_remote_ctx.remote_event_ack_msg_buf)); + spin_unlock(&sentry_buf_lock); + atomic_set(&sentry_remote_ctx.remote_event_ack_done, 1); + } +} + +/** + * create_kthread_to_process_msg - Create kernel thread to process incoming message + * @event_msg: Raw event message string + * @comm_type: Communication type (URMA or UVB) + * + * Return: 0 on success, negative error code on failure + * + * This function creates a kernel thread to process incoming remote messages, + * handling both panic/reboot events and acknowledgment messages. + */ +int create_kthread_to_process_msg(const char *event_msg, + enum SENTRY_REMOTE_COMM_TYPE comm_type) +{ + int ret; + struct sentry_msg_helper_msg msg; + uint32_t random_id; + struct child_thread_process_data *child_data; + struct task_struct *child_thread; + + ret = convert_str_to_smh_msg(event_msg, &msg, &random_id); + if (ret) { + pr_err("convert %s data to smh msg failed. msg [%s]\n", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", event_msg); + return -EINVAL; + } + + if (msg.type != SMH_MESSAGE_PANIC && msg.type != SMH_MESSAGE_KERNEL_REBOOT) { + /* Write acknowledgment message to shared memory */ + write_ack_msg_buf(&msg, comm_type); + return 0; + } + + child_data = kzalloc(sizeof(*child_data), GFP_KERNEL); + if (!child_data) { + pr_err("Failed to allocate memory for child_data\n"); + return -ENOMEM; + } + + child_data->msg = kzalloc(sizeof(*child_data->msg), GFP_KERNEL); + if (!child_data->msg) { + kfree(child_data); + pr_err("Failed to allocate memory for child_data->msg\n"); + return -ENOMEM; + } + + /* Update child thread data */ + memcpy(child_data->msg, &msg, sizeof(*child_data->msg)); + child_data->random_id = random_id; + child_data->comm_type = comm_type; + + child_thread = kthread_run(process_remote_event_msg, child_data, + "sentry_msg_thread_%s_%u", + comm_type == COMM_TYPE_URMA ? "urma" : "uvb", + random_id); + if (IS_ERR(child_thread)) { + kfree(child_data->msg); + kfree(child_data); + pr_err("Failed to create child thread\n"); + return PTR_ERR(child_thread); + } + + return 0; +} + +/** + * process_urma_data - Process URMA data in kernel thread + * @data: Thread data (unused) + * + * Return: 0 on success, negative error code on failure + * + * This function runs in a kernel thread to receive and process URMA messages, + * creating separate threads for message processing. + */ +static int process_urma_data(void *data) +{ + int ret = 0; + int recv_msg_nodes = 0; + char **msg_str; + int i; + + msg_str = kcalloc(MAX_NODE_NUM * MAX_DIE_NUM, sizeof(*msg_str), GFP_KERNEL); + if (!msg_str) { + pr_err("Failed to allocate memory for msg_str!\n"); + return -ENOMEM; + } + + for (i = 0; i < MAX_NODE_NUM * MAX_DIE_NUM; i++) { + msg_str[i] = kzalloc(URMA_SEND_DATA_MAX_LEN, GFP_KERNEL); + if (!msg_str[i]) { + pr_err("Failed to allocate memory for msg_str[%d]!\n", i); + ret = -ENOMEM; + goto free_msg; + } + } + + while (!kthread_should_stop()) { + /* Listen for URMA messages */ + recv_msg_nodes = urma_recv(msg_str, URMA_SEND_DATA_MAX_LEN); + if (recv_msg_nodes <= 0) { + /* + * Prevent processes from entering the D state if reboot event + * occurs on the current node + */ + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY); + continue; + } + + pr_info("urma messages are received, the number of nodes that are successfully received is %d\n", + recv_msg_nodes); + + for (i = 0; i < recv_msg_nodes; i++) { + if (strcmp(HEARTBEAT, msg_str[i]) == 0 || + strcmp(HEARTBEAT_ACK, msg_str[i]) == 0) + continue; + + ret = create_kthread_to_process_msg(msg_str[i], COMM_TYPE_URMA); + if (ret == -ENOMEM) + goto free_msg; + } + + /* + * Prevent processes from entering the D state if reboot event + * occurs on the current node + */ + msleep_interruptible(MILLISECONDS_OF_EACH_MDELAY); + } + +free_msg: + free_char_array(msg_str, MAX_NODE_NUM); + + pr_info("Urma receiver thread stopped!\n"); + return ret; +} + +/** + * cis_ubios_remote_msg_cb - UVB remote message callback + * @cis_msg: CIS message from UVB + * + * Return: 0 on success, negative error code on failure + * + * This function serves as the callback for UVB remote messages, + * processing incoming messages through the appropriate mechanism. + */ +int cis_ubios_remote_msg_cb(struct cis_message *cis_msg) +{ + int ret; + + pr_info("uvb get msg: [%s]\n", (char *)cis_msg->input); + ret = create_kthread_to_process_msg((char *)cis_msg->input, COMM_TYPE_UVB); + return ret; +} + +/** + * sentry_panic_reporter_init - Initialize sentry panic reporter module + * + * Return: 0 on success, negative error code on failure + */ +int sentry_panic_reporter_init(void) +{ + atomic_set(&sentry_remote_ctx.remote_event_ack_received, 0); + atomic_set(&sentry_remote_ctx.remote_event_ack_done, 0); + + sentry_remote_ctx.urma_receiver_thread = kthread_run(process_urma_data, NULL, "sentry_urma_kthread"); + if (IS_ERR(sentry_remote_ctx.urma_receiver_thread)) { + pr_err("Failed to create kernel urma receiver thread.\n"); + return PTR_ERR(sentry_remote_ctx.urma_receiver_thread); + } + + pr_info("Create kernel urma receiver thread success.\n"); + return 0; +} + +/** + * sentry_panic_reporter_exit - Cleanup sentry panic reporter module + */ +void sentry_panic_reporter_exit(void) +{ + if (sentry_remote_ctx.urma_receiver_thread) { + kthread_stop(sentry_remote_ctx.urma_receiver_thread); + sentry_remote_ctx.urma_receiver_thread = NULL; + pr_info("Kernel urma receiver thread stopped\n"); + } +} diff --git a/drivers/ub/sentry/smh_common_type.h b/drivers/ub/sentry/smh_common_type.h index fd8ed418928a..2303afc2e529 100644 --- a/drivers/ub/sentry/smh_common_type.h +++ b/drivers/ub/sentry/smh_common_type.h @@ -46,7 +46,11 @@ enum { enum sentry_msg_helper_msg_type { SMH_MESSAGE_POWER_OFF, SMH_MESSAGE_OOM, + SMH_MESSAGE_PANIC, + SMH_MESSAGE_KERNEL_REBOOT, SMH_MESSAGE_UB_MEM_ERR, + SMH_MESSAGE_PANIC_ACK, + SMH_MESSAGE_KERNEL_REBOOT_ACK, SMH_MESSAGE_UNKNOWN, }; @@ -64,6 +68,10 @@ struct sentry_msg_helper_msg { int timeout; int reason; } oom_info; + struct { + uint32_t cna; + char eid[EID_MAX_LEN]; + } remote_info; struct { uint64_t pa; int mem_type; @@ -74,11 +82,99 @@ struct sentry_msg_helper_msg { unsigned long res; }; +// urma communication interface +extern int urma_send(const char *buf, size_t len, const char *dst_eid, int die_index); +extern int urma_recv(char **buf_arr, size_t len); + +// UVB communication interface +extern int uvb_send(const char *str, uint32_t dst_cna, bool is_sync); + extern uint32_t g_local_cna; #define UVB_SENDER_ID_SYSSENTRY_INDEX (g_local_cna) #define UVB_SENDER_ID_SYSSENTRY (UBIOS_USER_ID_RICH_OS | UVB_SENDER_ID_SYSSENTRY_INDEX) #define UVB_RECEIVER_ID_SYSSENTRY(cna) (UBIOS_USER_ID_UB_DEVICE | (cna)) +/* + * str format type_cna_eid or type_cna_eid_res. type_cna_eid_res is ack msg. + * */ +static inline int convert_str_to_smh_msg(const char *str, + struct sentry_msg_helper_msg *smh_msg, + uint32_t *random_id) +{ + int n; + int ret = 0; + char input_copy[URMA_SEND_DATA_MAX_LEN]; + + n = sscanf(str, "%d_%s", (int *)&smh_msg->type, input_copy); + if (n != 2) { + pr_warn("Invalid msg str format and parse type failed! str is [%s].\n", str); + return -EINVAL; + } + + switch (smh_msg->type) { + case SMH_MESSAGE_PANIC: + case SMH_MESSAGE_KERNEL_REBOOT: + // eid length is EID_MAX_LEN - 1 + if (!(sscanf(input_copy, "%u_%39[^_]_%llu_%u%n", + &smh_msg->helper_msg_info.remote_info.cna, + smh_msg->helper_msg_info.remote_info.eid, + &smh_msg->timeout_time, + random_id, + &n) == 4) || strlen(input_copy) != n) { + pr_warn("Invalid msg str format and parse cna/eid failed! str is [%s].\n", str); + ret = -1; + } + break; + case SMH_MESSAGE_PANIC_ACK: + case SMH_MESSAGE_KERNEL_REBOOT_ACK: + if (!(sscanf(input_copy, "%u_%39[^_]_%lu%n", + &smh_msg->helper_msg_info.remote_info.cna, + smh_msg->helper_msg_info.remote_info.eid, + &smh_msg->res, + &n) == 3) || strlen(input_copy) != n) { + pr_warn("Invalid msg str format and parse cna/eid failed! str is [%s].\n", str); + ret = -1; + } + break; + default: + pr_warn("Invalid event type!\n"); + ret = -1; + } + return ret; +} + +static inline void free_char_array(char **array_ptr, int array_len) +{ + if (array_ptr) { + for (int i = 0; i < array_len; i++) { + if (array_ptr[i]) { + kfree(array_ptr[i]); + array_ptr[i] = NULL; + } + } + kfree(array_ptr); + array_ptr = NULL; + } +} + +/* + * Return 1 when buf is valid ipv4 format, return 0 when buf is invalid ipv4 format + * or any error occurs. + * +*/ +static inline int is_valid_ipv4(const char *buf) +{ + int ret; + __be32 addr; + + if (buf == NULL) { + return 0; + } + + ret = in4_pton(buf, strnlen(buf, EID_MAX_LEN), (u8 *)&addr, '\0', NULL); + return ret; +} + static inline int sentry_create_proc_file(const char *name, struct proc_dir_entry *parent, const struct proc_ops *proc_ops) { -- Gitee From d71435cbf530d61bb95313471b814452028fdcdd Mon Sep 17 00:00:00 2001 From: shixuantong Date: Mon, 8 Dec 2025 19:30:33 +0800 Subject: [PATCH 29/48] add check ack_dono flag for reboot event commit 34105c956636bc33fc00bd4c005510cd5c3ca72d openEuler As soon as an ACK msg is received, the loop should be exited. Signed-off-by: guodashun Signed-off-by: shixuantong Signed-off-by: shixuantong --- drivers/ub/sentry/sentry_remote_client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ub/sentry/sentry_remote_client.c b/drivers/ub/sentry/sentry_remote_client.c index da4b46f12afe..e15b41d133cb 100644 --- a/drivers/ub/sentry/sentry_remote_client.c +++ b/drivers/ub/sentry/sentry_remote_client.c @@ -229,7 +229,10 @@ int remote_event_handler(enum sentry_msg_helper_msg_type remote_type, ack_done = get_ack_done(&sentry_remote_ctx.remote_event_ack_msg_buf, remote_ack_type, COMM_TYPE_UNKNOWN); spin_unlock(&sentry_buf_lock); - continue; + if (ack_done) { + pr_info("Receive ack message, stop blocking early\n"); + break; + } } /* Handle acknowledgment in panic mode */ if (uvb_send_success) { -- Gitee From 930562f76a9ce299b4a456849219a87a04435467 Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Mon, 1 Dec 2025 17:11:06 +0800 Subject: [PATCH 30/48] obmm: set up default mem allocator granule commit ea4c8becb21ef7761cb9948ac5a6a5e748deef98 openEuler This commit fixes default mem allocator granule to make sure that when mem_allocator_granu is not specified, the allocator uses correct default granule, aligning with its expected behavior. Fixes: 84895087fb79 ("obmm: Implement memory pool allocator") Signed-off-by: Wang Xin Signed-off-by: Li Ruilin Signed-off-by: yuhao_zhang Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/ubmempool_allocator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ub/obmm/ubmempool_allocator.c b/drivers/ub/obmm/ubmempool_allocator.c index a0a3e7060edd..1be5a5534de1 100644 --- a/drivers/ub/obmm/ubmempool_allocator.c +++ b/drivers/ub/obmm/ubmempool_allocator.c @@ -424,7 +424,7 @@ static void mem_allocator_uninit_one(int nid) static char *mempool_allocator; module_param(mempool_allocator, charp, 0440); MODULE_PARM_DESC(mempool_allocator, - "OBMM mempool allocator. If not set, use kernel cmdline pmd_mapping to select."); + "OBMM mempool allocator. If not set, use buddy_highmem allocator."); static const char * const allocator_names[] = { "hugetlb_pmd", "hugetlb_pud", @@ -479,7 +479,7 @@ static int init_mem_allocator_granu(enum allocator_id aid) if (!mem_allocator_granu) { if (aid == ALLOCATOR_HUGETLB_PUD) __obmm_memseg_size = PUD_SIZE; - else if (aid == ALLOCATOR_HUGETLB_PMD) + else __obmm_memseg_size = PMD_SIZE; return 0; } -- Gitee From 854bc93f14fcf9e88d2a8445469019857af8be13 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Tue, 2 Dec 2025 20:28:18 +0800 Subject: [PATCH 31/48] obmm: remove redundant code and fix log issues commit 39b9cc8398639540e2c5dba6e62d808695de6d91 openEuler This commit fixes several issues with log printing and redundant code. Fixes: 84895087fb79 ("obmm: Implement memory pool allocator") Signed-off-by: Li Ruilin Signed-off-by: yuhao_zhang Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/conti_mem_allocator.c | 6 +++--- drivers/ub/obmm/obmm_core.c | 24 ++++++++++++------------ drivers/ub/obmm/obmm_export.c | 2 +- drivers/ub/obmm/obmm_export_from_pool.c | 2 +- drivers/ub/obmm/obmm_preimport.c | 2 +- drivers/ub/obmm/obmm_shm_dev.c | 9 +++------ 6 files changed, 21 insertions(+), 24 deletions(-) diff --git a/drivers/ub/obmm/conti_mem_allocator.c b/drivers/ub/obmm/conti_mem_allocator.c index 5b3cfe1d1e90..3ed9445daca6 100644 --- a/drivers/ub/obmm/conti_mem_allocator.c +++ b/drivers/ub/obmm/conti_mem_allocator.c @@ -258,8 +258,8 @@ size_t conti_alloc_memory(struct conti_mem_allocator *allocator, size_t size, spin_lock_irqsave(&allocator->lock, flags); available = conti_get_avail(allocator); if (!allow_slow && available < size) { - pr_err("%s:fast alloc failed. nid: %d, request: 0x%lx, available: 0x%lx", __func__, - allocator->nid, size, available); + pr_err("%s:fast alloc failed. nid: %d, request: 0x%lx, available: 0x%lx\n", + __func__, allocator->nid, size, available); spin_unlock_irqrestore(&allocator->lock, flags); goto out_continue_pool; } @@ -487,7 +487,7 @@ int conti_mem_allocator_init(struct conti_mem_allocator *allocator, int nid, siz int ret; if (!allocator || !ops) { - pr_err("%s: null pointer.", __func__); + pr_err("%s: null pointer.\n", __func__); return -EINVAL; } if (!ops->need_expand || !ops->expand_size) { diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index ded614003595..c01ac152e5ad 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -247,7 +247,7 @@ bool validate_scna(u32 scna) int ret = ub_mem_get_numa_id(scna); if (ret < 0) { - pr_err("%#x is not a known scna, lookup ret=%pe", scna, ERR_PTR(ret)); + pr_err("%#x is not a known scna, lookup ret=%pe\n", scna, ERR_PTR(ret)); return false; } return true; @@ -432,7 +432,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_from_user(&cmd_param.create, (void __user *)arg, sizeof(struct obmm_cmd_export)); if (ret) { - pr_err("failed to load export argument"); + pr_err("failed to load export argument\n"); return -EFAULT; } @@ -443,7 +443,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_to_user((void __user *)arg, &cmd_param.create, sizeof(struct obmm_cmd_export)); if (ret) { - pr_err("failed to write export result"); + pr_err("failed to write export result\n"); return -EFAULT; } } break; @@ -451,7 +451,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_from_user(&cmd_param.import, (void __user *)arg, sizeof(struct obmm_cmd_import)); if (ret) { - pr_err("failed to load import argument"); + pr_err("failed to load import argument\n"); return -EFAULT; } @@ -462,7 +462,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_to_user((void __user *)arg, &cmd_param.import, sizeof(struct obmm_cmd_import)); if (ret) { - pr_err("failed to write import result"); + pr_err("failed to write import result\n"); return -EFAULT; } } break; @@ -470,7 +470,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_from_user(&cmd_param.unexport, (void __user *)arg, sizeof(struct obmm_cmd_unexport)); if (ret) { - pr_err("failed to load unexport argument"); + pr_err("failed to load unexport argument\n"); return -EFAULT; } @@ -480,7 +480,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_from_user(&cmd_param.unimport, (void __user *)arg, sizeof(struct obmm_cmd_unimport)); if (ret) { - pr_err("failed to load unimport argument"); + pr_err("failed to load unimport argument\n"); return -EFAULT; } @@ -490,7 +490,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_from_user(&cmd_param.query, (void __user *)arg, sizeof(struct obmm_cmd_addr_query)); if (ret) { - pr_err("failed to load addr_query argument"); + pr_err("failed to load addr_query argument\n"); return -EFAULT; } @@ -501,7 +501,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_to_user((void __user *)arg, &cmd_param.query, sizeof(struct obmm_cmd_addr_query)); if (ret) { - pr_err("failed to write obmm_query result"); + pr_err("failed to write obmm_query result\n"); return -EFAULT; } } break; @@ -528,7 +528,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_from_user(&cmd_param.preimport, (void __user *)arg, sizeof(struct obmm_cmd_preimport)); if (ret) { - pr_err("failed to load preimport argument"); + pr_err("failed to load preimport argument\n"); return -EFAULT; } @@ -539,7 +539,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_to_user((void __user *)arg, &cmd_param.preimport, sizeof(struct obmm_cmd_preimport)); if (ret) { - pr_err("failed to write preimport result"); + pr_err("failed to write preimport result\n"); return -EFAULT; } } break; @@ -547,7 +547,7 @@ static long obmm_dev_ioctl(struct file *file __always_unused, unsigned int cmd, ret = (int)copy_from_user(&cmd_param.preimport, (void __user *)arg, sizeof(struct obmm_cmd_preimport)); if (ret) { - pr_err("failed to load preimport argument"); + pr_err("failed to load preimport argument\n"); return -EFAULT; } diff --git a/drivers/ub/obmm/obmm_export.c b/drivers/ub/obmm/obmm_export.c index 50f21dde5f62..e1ec90cf15dd 100644 --- a/drivers/ub/obmm/obmm_export.c +++ b/drivers/ub/obmm/obmm_export.c @@ -246,7 +246,7 @@ int set_export_vendor(struct obmm_export_region *e_reg, const void __user *vendo return 0; } if (vendor_len > OBMM_MAX_VENDOR_LEN) { - pr_err("invalid vendor_len = 0x%x, should less than 0x%x", vendor_len, + pr_err("invalid vendor_len = 0x%x, should less than 0x%x\n", vendor_len, OBMM_MAX_VENDOR_LEN); return -EINVAL; } diff --git a/drivers/ub/obmm/obmm_export_from_pool.c b/drivers/ub/obmm/obmm_export_from_pool.c index d2b091bc0724..dabf5373ea2c 100644 --- a/drivers/ub/obmm/obmm_export_from_pool.c +++ b/drivers/ub/obmm/obmm_export_from_pool.c @@ -234,7 +234,7 @@ static int calculate_export_region_size(unsigned long *total_size, } node_set(cmd_export->pxm_numa, nodes); if (!nodes_on_same_package(&nodes)) { - pr_err("Cannot use memory from multiple sockets.\n"); + pr_err("Cannot use memory from multiple sockets or memory and ub controller is from different sockets.\n"); return -EINVAL; } diff --git a/drivers/ub/obmm/obmm_preimport.c b/drivers/ub/obmm/obmm_preimport.c index 76d63159af19..96daafbadcec 100644 --- a/drivers/ub/obmm/obmm_preimport.c +++ b/drivers/ub/obmm/obmm_preimport.c @@ -233,7 +233,7 @@ int obmm_unpreimport(struct obmm_cmd_preimport *cmd) ret = preimport_release_prefilled(cmd->pa, cmd->pa + cmd->length - 1); if (ret == 0) module_put(THIS_MODULE); - pr_info("%s: unpreimport on nid=%d finished.\n", __func__, cmd->numa_id); + pr_info("%s: unpreimport on pa=%#llx finished.\n", __func__, cmd->pa); return ret; } diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index f651399e712d..751abee40a02 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -304,12 +304,12 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) print_mmap_param(file, vma); if (!region_allow_mmap(reg)) { - pr_err("mmap region %d: not allow to be mmaped", reg->regionid); + pr_err("mmap region %d: not allow to be mmaped\n", reg->regionid); return -EPERM; } if (!validate_perm(file, vma->vm_flags)) { - pr_err("mmap region %d: invalid vma permission", reg->regionid); + pr_err("mmap region %d: invalid vma permission\n", reg->regionid); return -EPERM; } @@ -318,7 +318,7 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) offset = vma->vm_pgoff << PAGE_SHIFT; if (offset & OBMM_MMAP_FLAG_HUGETLB_PMD) { - pr_debug("trying hugepage mmap"); + pr_debug("trying hugepage mmap\n"); mmap_granu = OBMM_MMAP_GRANU_PMD; offset &= ~OBMM_MMAP_FLAG_HUGETLB_PMD; } else { @@ -874,9 +874,6 @@ const struct file_operations obmm_shm_fops = { .owner = THIS_MODULE, static void obmm_shm_dev_release(struct device *dev) { - struct obmm_region *reg; - - reg = container_of(dev, struct obmm_region, device); module_put(THIS_MODULE); } -- Gitee From 8e6569c9fa8bf9659d84834e3e9a6e58f61ea8ce Mon Sep 17 00:00:00 2001 From: yuhao_zhang Date: Mon, 8 Dec 2025 22:14:49 +0800 Subject: [PATCH 32/48] obmm: fix granu checking in mmap commit 8b73b2ba52e6cecaf22cb928cbb96abfe313bced openEuler This patch removes incorrect granu checking code, which may led to kernel panic or memory leak. Fixes: 49ddfaab9aa3 ("obmm: Add mmap support for shared memory regions") Signed-off-by: yuhao_zhang Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_shm_dev.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index 751abee40a02..05de51ee167b 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -391,17 +391,6 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) if (ret) goto err_release_local_state_info; } - /* - * initialize region-level ownership info if not done yet. - * once initialized, the OBMM ownership will persist until - * the memdev goes offline. - */ - ret = init_ownership_info(reg); - if (ret) - goto err_release_local_state_info; - ret = check_mmap_allowed(reg, vma, mem_state); - if (ret) - goto err_release_local_state_info; ret = map_obmm_region(vma, reg, mmap_granu); if (ret) { -- Gitee From bd2d3c81b2d609217de12d7bd5a58a6fe213e808 Mon Sep 17 00:00:00 2001 From: yuhao_zhang Date: Mon, 8 Dec 2025 21:41:19 +0800 Subject: [PATCH 33/48] obmm: set MAX Numa Distance to 255 commit 83ae17336e75ca20f3819afd45e2067346ca6f77 openEuler We used to considered 255 as a reserved value for numa_distance. Now we have confirmed that 255 indicates a distance which is unreachable. We adhere to this semantics and set this value as the maximum distance. Fixes: 8a5e2c9e2b16 ("obmm: Add memory region import functionality") Signed-off-by: yuhao_zhang Signed-off-by: Wang Xin <2913220561@qq.com> --- include/uapi/ub/obmm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/ub/obmm.h b/include/uapi/ub/obmm.h index 7c2b29e3df73..a958092e5687 100644 --- a/include/uapi/ub/obmm.h +++ b/include/uapi/ub/obmm.h @@ -14,7 +14,7 @@ extern "C" { #define OBMM_MAX_LOCAL_NUMA_NODES 16 -#define MAX_NUMA_DIST 254 +#define MAX_NUMA_DIST 255 #define OBMM_MAX_PRIV_LEN 512 #define OBMM_MAX_VENDOR_LEN 128 -- Gitee From 65da2f2a0cf9706df341b4548e0ed5072d004dc0 Mon Sep 17 00:00:00 2001 From: yuhao_zhang Date: Mon, 8 Dec 2025 21:17:52 +0800 Subject: [PATCH 34/48] obmm: sync obmm default params value to module sysfs. commit 0fdd1df3b799e4ab8a7a9d3cf10d1b755255a4a2 openEuler To provide a friendly memory pool query interfaces, this commit syncs obmm default parameters values to /sys/module/obmm/parameters. Signed-off-by: yuhao_zhang Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/ubmempool_allocator.c | 29 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/ub/obmm/ubmempool_allocator.c b/drivers/ub/obmm/ubmempool_allocator.c index 1be5a5534de1..f687dec91ca7 100644 --- a/drivers/ub/obmm/ubmempool_allocator.c +++ b/drivers/ub/obmm/ubmempool_allocator.c @@ -22,8 +22,9 @@ #include "obmm_cache.h" #include "ubmempool_allocator.h" -#define DEFAULT_MEMPOOL_SIZE "1G" -static char *mempool_size = DEFAULT_MEMPOOL_SIZE; +#define MAX_DEFAULT_PARAM_LENGTH 20 +static char default_mempool_size[MAX_DEFAULT_PARAM_LENGTH] = "1G"; +static char *mempool_size = default_mempool_size; module_param(mempool_size, charp, 0440); MODULE_PARM_DESC(mempool_size, "Max aviliable cached memory total."); @@ -421,7 +422,8 @@ static void mem_allocator_uninit_one(int nid) timer_shutdown_sync(&mem_allocators[nid].refill_timer); } -static char *mempool_allocator; +static char default_mempool_allocator[MAX_DEFAULT_PARAM_LENGTH] = "buddy_highmem"; +static char *mempool_allocator = default_mempool_allocator; module_param(mempool_allocator, charp, 0440); MODULE_PARM_DESC(mempool_allocator, "OBMM mempool allocator. If not set, use buddy_highmem allocator."); @@ -451,11 +453,6 @@ static int select_mem_allocator(void) { int i; - if (!mempool_allocator) { - pr_info("no allocator specified. using buddy_highmem.\n"); - return ALLOCATOR_BUDDY_HIGHMEM; - } - for (i = 0; i < ALLOCATOR_MAX; i++) { if (!strcmp(allocator_names[i], mempool_allocator)) break; @@ -472,8 +469,21 @@ static int select_mem_allocator(void) return i; } +static void print_granu(char s[MAX_DEFAULT_PARAM_LENGTH], size_t granu) +{ + static const char * const units[] = {"", "K", "M", "G", "T"}; + int i = 0; + + while (granu >= 1024 && i < ARRAY_SIZE(units) - 1) { + granu >>= 10; + i++; + } + snprintf(s, MAX_DEFAULT_PARAM_LENGTH, "%lu%s", granu, units[i]); +} + static int init_mem_allocator_granu(enum allocator_id aid) { + static char def_granu[MAX_DEFAULT_PARAM_LENGTH]; char *p = mem_allocator_granu; if (!mem_allocator_granu) { @@ -481,6 +491,9 @@ static int init_mem_allocator_granu(enum allocator_id aid) __obmm_memseg_size = PUD_SIZE; else __obmm_memseg_size = PMD_SIZE; + + print_granu(def_granu, __obmm_memseg_size); + mem_allocator_granu = def_granu; return 0; } -- Gitee From 0d59f2924c8914560651b4966f87ce19d82be4ec Mon Sep 17 00:00:00 2001 From: shixuantong Date: Thu, 18 Dec 2025 17:31:13 +0800 Subject: [PATCH 35/48] add check ack msg and do sleep commit 8c00d38e40a6d63dea74afe8ed39eed81b1643a8 openEuler add check ack msg and do sleep, otherwise, the block will end prematurely. Signed-off-by: shixuantong Signed-off-by: guodashun Signed-off-by: shixuantong --- drivers/ub/sentry/sentry_remote_client.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/ub/sentry/sentry_remote_client.c b/drivers/ub/sentry/sentry_remote_client.c index e15b41d133cb..ed4ac35cc718 100644 --- a/drivers/ub/sentry/sentry_remote_client.c +++ b/drivers/ub/sentry/sentry_remote_client.c @@ -229,10 +229,7 @@ int remote_event_handler(enum sentry_msg_helper_msg_type remote_type, ack_done = get_ack_done(&sentry_remote_ctx.remote_event_ack_msg_buf, remote_ack_type, COMM_TYPE_UNKNOWN); spin_unlock(&sentry_buf_lock); - if (ack_done) { - pr_info("Receive ack message, stop blocking early\n"); - break; - } + goto check_ack_and_sleep; } /* Handle acknowledgment in panic mode */ if (uvb_send_success) { @@ -262,7 +259,7 @@ int remote_event_handler(enum sentry_msg_helper_msg_type remote_type, recv_msg_nodes = urma_recv(sentry_client_ctx.msg_str, URMA_SEND_DATA_MAX_LEN); if (recv_msg_nodes <= 0) - continue; + goto check_ack_and_sleep; pr_info("urma received %d nodes\n", recv_msg_nodes); for (int l = 0; l < recv_msg_nodes; l++) { struct sentry_msg_helper_msg msg; @@ -287,6 +284,7 @@ int remote_event_handler(enum sentry_msg_helper_msg_type remote_type, } } +check_ack_and_sleep: /* Check if acknowledgment received */ if (ack_done) { pr_info("Receive ack message, stop blocking early\n"); -- Gitee From e3e0b03a48dde62b948ac92c3a92d2d942000e33 Mon Sep 17 00:00:00 2001 From: yuhao_zhang Date: Sun, 14 Dec 2025 01:37:06 +0800 Subject: [PATCH 36/48] obmm: fix for PMD mapping commit 378b516f2bb5292fbc6dc4d80734a57c3cffe952 openEuler This patch adds the get_unmapped_area ops to support unaligned PMD mapping. Fixes: 49ddfaab9aa3 ("obmm: Add mmap support for shared memory regions") Signed-off-by: yuhao_zhang Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_shm_dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index 05de51ee167b..214031829f74 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -321,6 +321,11 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) pr_debug("trying hugepage mmap\n"); mmap_granu = OBMM_MMAP_GRANU_PMD; offset &= ~OBMM_MMAP_FLAG_HUGETLB_PMD; + if (vma->vm_start % PMD_SIZE || vma->vm_end % PMD_SIZE) { + pr_err("error running huge mmap for not pmd-aligned vma: %#lx-%#lx\n", + vma->vm_start, vma->vm_end); + return -EINVAL; + } } else { mmap_granu = OBMM_MMAP_GRANU_PAGE; } @@ -857,6 +862,7 @@ static long obmm_shm_fops_ioctl(struct file *file, unsigned int cmd, unsigned lo const struct file_operations obmm_shm_fops = { .owner = THIS_MODULE, .unlocked_ioctl = obmm_shm_fops_ioctl, .mmap = obmm_shm_fops_mmap, + .get_unmapped_area = thp_get_unmapped_area, .open = obmm_shm_fops_open, .flush = obmm_shm_fops_flush, .release = obmm_shm_fops_release }; -- Gitee From d96d0dd1caad96c08455faf099e3c343198fe000 Mon Sep 17 00:00:00 2001 From: yuhao_zhang Date: Sun, 14 Dec 2025 01:38:24 +0800 Subject: [PATCH 37/48] obmm: display mmaped region's Pagesize commit 9bf7547cefea50d206e2abbdc6cae66a18407006 openEuler This patch adds pagesize ops to display right PageSize in /proc/pid/smaps. Fixes: 49ddfaab9aa3 ("obmm: Add mmap support for shared memory regions") Signed-off-by: yuhao_zhang Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_shm_dev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index 214031829f74..0272287fea64 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -155,6 +155,17 @@ static const char *obmm_vma_name(struct vm_area_struct *vma __always_unused) return "OBMM_SHM"; } +static unsigned long obmm_pagesize(struct vm_area_struct *vma) +{ + struct file *filp = vma->vm_file; + struct obmm_region *reg = (struct obmm_region *)filp->private_data; + + if (reg->mmap_granu == OBMM_MMAP_GRANU_PMD) + return PMD_SIZE; + else + return PAGE_SIZE; +} + static const struct vm_operations_struct obmm_vm_ops = { .open = obmm_vma_open, .close = obmm_vma_close, @@ -164,6 +175,7 @@ static const struct vm_operations_struct obmm_vm_ops = { .fault = obmm_vma_fault, .access = obmm_vma_access, .name = obmm_vma_name, + .pagesize = obmm_pagesize, }; static int obmm_shm_fops_open(struct inode *inode, struct file *file) -- Gitee From 099c688025d940b9a5a8ea1ffffe28a361af4e04 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Mon, 15 Dec 2025 00:32:58 +0800 Subject: [PATCH 38/48] obmm: Fix race condition between unexport and addr_query_by_pa commit b4c1dac0f739d5cfc9e145cb9640a18e12e3e531 openEuler When obmm_query_by_pa was iterating through regions, it could access a region that was being unexported and thus in an inconsistent state. This may cause sgt structure being accessed after it was freed. Fix this by trying to get region refcount. Fixes: 64c6ae6a06e8 ("obmm: Add memory region export functionality") Signed-off-by: Li Ruilin Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ub/obmm/obmm_core.c b/drivers/ub/obmm/obmm_core.c index c01ac152e5ad..a0a681d947bc 100644 --- a/drivers/ub/obmm/obmm_core.c +++ b/drivers/ub/obmm/obmm_core.c @@ -178,6 +178,8 @@ int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr) spin_lock_irqsave(lock, flags); list_for_each_entry(region, &g_obmm_ctx_info.regions, node) { + if (!try_get_obmm_region(region)) + continue; if (region->type == OBMM_IMPORT_REGION) { struct obmm_import_region *i_reg; @@ -190,7 +192,7 @@ int obmm_query_by_pa(unsigned long pa, struct obmm_ext_addr *ext_addr) e_reg = container_of(region, struct obmm_export_region, region); ret = get_pa_detail_export_region(e_reg, pa, ext_addr); } - + put_obmm_region(region); if (ret == 0) break; } -- Gitee From 36b50fdfd87d5492f2053d217ff0148f7a62eb8b Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Mon, 15 Dec 2025 12:19:31 +0800 Subject: [PATCH 39/48] obmm: Remove log pringts of physical address and kernelspace virtual address commit e777551b49fb8d8dbad14dd3a44c2f9b5c8fa6d4 openEuler For security reasons, remove all redundant log prints of physical address information and kernelspace virtual address information. For physical address information, only keep the prints at the entry and exit points of user operations. Address information on error path is also removed. prints. When an error occurs found, we can still locate it with the information prints at the operation entry point. All kernelspace virtual address prints have been completely removed. Userspace virtual address prints are keeped. Fixes: ac78ffc6e0c3 ("obmm: Add resource management support for imported memory") Signed-off-by: Li Ruilin Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/conti_mem_allocator.c | 3 -- drivers/ub/obmm/obmm_addr_check.c | 23 +++---------- drivers/ub/obmm/obmm_cache.c | 8 +---- drivers/ub/obmm/obmm_export_from_user.c | 2 +- drivers/ub/obmm/obmm_export_region_ops.c | 11 +++---- drivers/ub/obmm/obmm_import.c | 26 +++++++-------- drivers/ub/obmm/obmm_preimport.c | 33 ++++++++++--------- drivers/ub/obmm/obmm_preimport_prefilled.c | 38 +++++++--------------- drivers/ub/obmm/ubmempool_allocator.c | 21 +++--------- 9 files changed, 58 insertions(+), 107 deletions(-) diff --git a/drivers/ub/obmm/conti_mem_allocator.c b/drivers/ub/obmm/conti_mem_allocator.c index 3ed9445daca6..8ca482f5dfa5 100644 --- a/drivers/ub/obmm/conti_mem_allocator.c +++ b/drivers/ub/obmm/conti_mem_allocator.c @@ -373,11 +373,8 @@ static int conti_clear_thread(void *p) list_del(&node->list); allocator->memseg_clearing = node; - pr_debug("clearing: %d: %pa + 0x%lx\n", allocator->nid, &node->addr, node->size); spin_unlock_irqrestore(&allocator->lock, flags); ret = conti_clear_memseg(allocator, node); - pr_debug("%s: nid=%d, clear done node=%p, addr=%pa\n", __func__, allocator->nid, - node, &node->addr); spin_lock_irqsave(&allocator->lock, flags); allocator->memseg_clearing = NULL; diff --git a/drivers/ub/obmm/obmm_addr_check.c b/drivers/ub/obmm/obmm_addr_check.c index 09085d008ca8..4619dd9e6c6b 100644 --- a/drivers/ub/obmm/obmm_addr_check.c +++ b/drivers/ub/obmm/obmm_addr_check.c @@ -21,12 +21,7 @@ static struct pa_checker g_pa_checker; static bool is_same_pa_range(const struct obmm_pa_range *l, const struct obmm_pa_range *r) { - bool same = l->start == r->start && l->end == r->end; - - if (!same) - pr_err("unmatched pa range: [%pa, %pa] vs. [%pa, %pa]\n", &l->start, &l->end, - &r->start, &r->end); - return same; + return l->start == r->start && l->end == r->end; } int occupy_pa_range(const struct obmm_pa_range *pa_range) @@ -46,14 +41,9 @@ int occupy_pa_range(const struct obmm_pa_range *pa_range) if (ret != 0) { kfree(persist_info); - pr_err("failed to occupy PA range [%pa, %pa]: ret=%pe\n", &pa_range->start, - &pa_range->end, ERR_PTR(ret)); + pr_err("failed to occupy PA range: ret=%pe\n", ERR_PTR(ret)); return ret; } - pr_debug("pa_check: add [%pa,%pa]->{user=%s,data=%p}\n", &pa_range->start, &pa_range->end, - pa_range->info.user == OBMM_ADDR_USER_DIRECT_IMPORT ? - "direct_import" : "preimport", - pa_range->info.data); return 0; } @@ -68,19 +58,17 @@ int free_pa_range(const struct obmm_pa_range *pa_range) entry = mtree_erase(&g_pa_checker.pa_ranges, (unsigned long)pa_range->start); spin_unlock_irqrestore(&g_pa_checker.lock, flags); if (!entry) { - pr_err("PA range [%pa, %pa], not found.\n", &pa_range->start, &pa_range->end); + pr_err("PA range to be freed not found.\n"); return -EFAULT; } ret = 0; if (!is_same_pa_range((const struct obmm_pa_range *)entry, pa_range)) { /* expected to be UNREACHABLE */ - pr_err("BUG: PA range does not fully match.\n"); + pr_err("BUG: PA range to be freed does not fully match.\n"); ret = -ENOTRECOVERABLE; } user = ((struct obmm_pa_range *)entry)->info.user == OBMM_ADDR_USER_DIRECT_IMPORT ? "import" : "preimport"; - pr_debug("pa_check: del [%pa,?]->{user=%s,data=%p}\n", &pa_range->start, user, - ((struct obmm_pa_range *)entry)->info.data); kfree(entry); return ret; } @@ -126,9 +114,6 @@ int update_pa_range(phys_addr_t addr, const struct obmm_addr_info *info) if (!retrieved) return -EFAULT; - pr_debug("pa_check: update [%pa,?]->{user=%s,data=%p}\n", &addr, - info->user == OBMM_ADDR_USER_DIRECT_IMPORT ? "direct_import" : "preimport", - info->data); return 0; } diff --git a/drivers/ub/obmm/obmm_cache.c b/drivers/ub/obmm/obmm_cache.c index 534be7788501..1909da83de4c 100644 --- a/drivers/ub/obmm/obmm_cache.c +++ b/drivers/ub/obmm/obmm_cache.c @@ -71,8 +71,7 @@ int flush_cache_by_pa(phys_addr_t addr, size_t size, unsigned long cache_ops) enum hisi_soc_cache_maint_type maint_type = hisi_maint_type[cache_ops]; if (skip_cache_maintain) { - pr_debug_ratelimited("cache maintenance request {addr=%pa, size=%#zx, cache_ops=%lu}.\n", - &addr, size, cache_ops); + pr_debug_ratelimited("cache maintenance request {cache_ops=%lu}.\n", cache_ops); return 0; } @@ -225,10 +224,5 @@ int modify_pgtable_prot(struct mm_struct *mm, void *va, size_t size, bool cachea mmap_read_unlock(mm); obmm_flush_tlb(mm); - pr_debug("scan [%p-%#lx]\n", va, (uintptr_t)va + size); - pr_debug("\tpmd: %d\n", info.pmd_cnt); - pr_debug("\tpmd leaf: %d\n", info.pmd_leaf_cnt); - pr_debug("\tpte: %d\n", info.pte_cnt); - pr_debug("\thugetlb: %d\n", info.hugetlb_cnt); return 0; } diff --git a/drivers/ub/obmm/obmm_export_from_user.c b/drivers/ub/obmm/obmm_export_from_user.c index e1cd35416e63..bd0663bb197f 100644 --- a/drivers/ub/obmm/obmm_export_from_user.c +++ b/drivers/ub/obmm/obmm_export_from_user.c @@ -100,7 +100,7 @@ static bool hisi_workarounds_check_page_list(struct obmm_export_region *reg, str nid = 0; #endif if (nid < 0 || nid >= OBMM_MAX_LOCAL_NUMA_NODES) { - pr_err("Invalid node ID %d for page %p\n", nid, p); + pr_err("Invalid node ID %d.\n", nid); return false; } diff --git a/drivers/ub/obmm/obmm_export_region_ops.c b/drivers/ub/obmm/obmm_export_region_ops.c index 3b9cdd6b7dac..b561224b660b 100644 --- a/drivers/ub/obmm/obmm_export_region_ops.c +++ b/drivers/ub/obmm/obmm_export_region_ops.c @@ -129,15 +129,14 @@ static int kernel_pgtable_invalid_call(phys_addr_t start, phys_addr_t end, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long end_pfn = (end + 1) >> PAGE_SHIFT; - pr_debug("call external: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_inval=%d)\n", - start_pfn, end_pfn, info->set_inval); + pr_debug("call external: set_linear_mapping_invalid(set_inval=%d)\n", info->set_inval); ret = set_linear_mapping_invalid(start_pfn, end_pfn, info->set_inval); if (ret < 0) { - pr_err("error calling set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_inval=%d): ret=%pe\n", - start_pfn, end_pfn, info->set_inval, ERR_PTR(ret)); + pr_err("error calling set_linear_mapping_invalid(set_inval=%d): ret=%pe\n", + info->set_inval, ERR_PTR(ret)); } else { - pr_debug("external called: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_inval=%d, ret=%pe)\n", - start_pfn, end_pfn, info->set_inval, ERR_PTR(ret)); + pr_debug("external called: set_linear_mapping_invalid(set_inval=%d, ret=%pe)\n", + info->set_inval, ERR_PTR(ret)); } info->ret = ret; diff --git a/drivers/ub/obmm/obmm_import.c b/drivers/ub/obmm/obmm_import.c index 2875fa9e58ec..18f59efa90c6 100644 --- a/drivers/ub/obmm/obmm_import.c +++ b/drivers/ub/obmm/obmm_import.c @@ -38,8 +38,7 @@ static unsigned long get_pa_range_mem_cap(u32 scna, phys_addr_t pa, size_t size) if (ub_memory_validate_pa(scna, pa_start, pa_end, false)) mem_cap |= OBMM_MEM_ALLOW_NONCACHEABLE_MMAP; if (mem_cap == 0) - pr_err("PA range invalid. Non-UBMEM memory cannot be mmaped as import memory: pa=%pa, size=%#zx\n", - &pa_start, size); + pr_err("PA range invalid. Non-UBMEM memory cannot be mmaped as import memory\n"); return mem_cap; } @@ -60,8 +59,7 @@ static int setup_pa(struct obmm_import_region *i_reg) ubmem_res = setup_ubmem_resource(i_reg->pa, i_reg->region.mem_size, false); if (IS_ERR(ubmem_res)) { - pr_err("failed to setup ubmem resource. pa=%pa, size=%#llx, ret=%pe\n", - &i_reg->pa, i_reg->region.mem_size, ubmem_res); + pr_err("failed to setup ubmem resource: ret=%pe\n", ubmem_res); return PTR_ERR(ubmem_res); } i_reg->ubmem_res = ubmem_res; @@ -99,17 +97,20 @@ static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) { int ret, this_ret; - pr_info("call external: remove_memory_remote(nid=%d, pa=%#llx, size=%#llx)\n", - i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); + pr_info("call external: remove_memory_remote(nid=%d, size=%#llx)\n", + i_reg->numa_id, i_reg->region.mem_size); ret = remove_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); pr_debug("external called: remove_memory_remote, ret=%pe\n", ERR_PTR(ret)); /* a full rollback is still possible: check whether this is a full teardown */ - if (ret != 0 && !force) + if (ret != 0 && !force) { + pr_err("remove_memory_remote(nid=%d, size=%#llx) failed: ret=%pe.\n", + i_reg->numa_id, i_reg->region.mem_size, ERR_PTR(ret)); return ret; + } if (region_preimport(&i_reg->region)) { - pr_info("call external: add_memory_remote(nid=%d, start=0x%llx, size=0x%llx, flags=MEMORY_KEEP_ISOLATED)\n", - i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); + pr_info("call external: add_memory_remote(nid=%d, size=0x%llx, flags=MEMORY_KEEP_ISOLATED)\n", + i_reg->numa_id, i_reg->region.mem_size); this_ret = add_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, MEMORY_KEEP_ISOLATED); pr_debug("external called: add_memory_remote() returned %d\n", this_ret); @@ -132,13 +133,12 @@ static int setup_remote_numa(struct obmm_import_region *i_reg) flags = MEMORY_DIRECT_ONLINE; if (!(i_reg->region.mem_cap & OBMM_MEM_ALLOW_CACHEABLE_MMAP)) { - pr_err("PA range invalid. Cacheable memory cannot be managed with numa.remote: pa=%pa, size=%#llx\n", - &i_reg->pa, i_reg->region.mem_size); + pr_err("PA range invalid. Cacheable memory cannot be managed with numa.remote\n"); return -EINVAL; } - pr_info("call external: add_memory_remote(nid=%d, start=0x%llx, size=0x%llx, flags=%d)\n", - i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, flags); + pr_info("call external: add_memory_remote(nid=%d, flags=%d)\n", + i_reg->numa_id, flags); ret = add_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size, flags); pr_debug("external called: add_memory_remote() returned %d\n", ret); if (ret < 0) { diff --git a/drivers/ub/obmm/obmm_preimport.c b/drivers/ub/obmm/obmm_preimport.c index 96daafbadcec..aac211232a88 100644 --- a/drivers/ub/obmm/obmm_preimport.c +++ b/drivers/ub/obmm/obmm_preimport.c @@ -69,17 +69,17 @@ int check_preimport_cmd_common(const struct obmm_cmd_preimport *cmd) * to check for OBMM_BASIC_GRANU here. */ if (cmd->length % memory_block_size_bytes() != 0) { - pr_err("preimport length not aligned to %#lx: %#llx + %#llx.\n", - memory_block_size_bytes(), cmd->pa, cmd->length); + pr_err("preimport length not aligned to %#lx.\n", + memory_block_size_bytes()); return -EINVAL; } if (cmd->pa % memory_block_size_bytes()) { - pr_err("preimport base PA not aligned to %#lx: %#llx + %#llx.\n", - memory_block_size_bytes(), cmd->pa, cmd->length); + pr_err("preimport base PA not aligned to %#lx.\n", + memory_block_size_bytes()); return -EINVAL; } if (cmd->length > ULLONG_MAX - cmd->pa) { - pr_err("preimport PA range overflowed: %#llx + %#llx.\n", cmd->pa, cmd->length); + pr_err("preimport PA range overflowed.\n"); return -EINVAL; } if (cmd->length == 0) { @@ -103,18 +103,20 @@ int preimport_prepare_common(struct preimport_range *pr, uint8_t base_dist) int ret, ret_err; if (!ub_memory_validate_pa(pr->scna, pr->start, pr->end, true)) { - pr_err("PA range invalid. Cacheable memory cannot be managed with preimport: pa=%pa, size=%#llx\n", - &pr->start, pr->end - pr->start + 1); + pr_err("PA range invalid. Cacheable memory cannot be managed with preimport\n"); return -EINVAL; } - pr_info("call external: add_memory_remote(nid=%d, start=%pa, size=%#llx, flags=MEMORY_KEEP_ISOLATED)\n", - pr->numa_id, &pr->start, pr->end - pr->start + 1); + pr_info("call external: add_memory_remote(nid=%d, flags=MEMORY_KEEP_ISOLATED)\n", + pr->numa_id); ret = add_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1, MEMORY_KEEP_ISOLATED); pr_debug("external called: add_memory_remote() returned %d\n", ret); - if (ret < 0) + if (ret < 0) { + pr_err("failed to call add_memory_remote(nid=%d): %pe\n", + pr->numa_id, ERR_PTR(ret)); return -EPERM; + } WARN_ON(pr->numa_id != NUMA_NO_NODE && pr->numa_id != ret); pr->numa_id = ret; @@ -131,8 +133,7 @@ int preimport_prepare_common(struct preimport_range *pr, uint8_t base_dist) return 0; err_remove_memory_remote: - pr_info("call external: remove_memory_remote(nid=%d, start=%pa, size=%#llx)\n", pr->numa_id, - &pr->start, pr->end - pr->start + 1); + pr_info("call external: remove_memory_remote(nid=%d)\n", pr->numa_id); ret_err = remove_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1); pr_debug("external called: remove_memory_remote() returned %d\n", ret_err); return ret; @@ -142,12 +143,14 @@ int preimport_release_common(struct preimport_range *pr, bool force) { int ret; - pr_info("call external: remove_memory_remote(nid=%d, start=%pa, size=%#llx)\n", pr->numa_id, - &pr->start, pr->end - pr->start + 1); + pr_info("call external: remove_memory_remote(nid=%d)\n", pr->numa_id); ret = remove_memory_remote(pr->numa_id, pr->start, pr->end - pr->start + 1); pr_debug("external called: remove_memory_remote() returned %pe\n", ERR_PTR(ret)); - if (ret && !force) + if (ret && !force) { + pr_err("failed to call remove_memory_remote(nid=%d, size=%#llx): ret=%pe.\n", + pr->numa_id, pr->end - pr->start + 1, ERR_PTR(ret)); return ret; + } mutex_lock(&list_mutex); list_del(&pr->node); diff --git a/drivers/ub/obmm/obmm_preimport_prefilled.c b/drivers/ub/obmm/obmm_preimport_prefilled.c index f06df16892bf..50a4273d0924 100644 --- a/drivers/ub/obmm/obmm_preimport_prefilled.c +++ b/drivers/ub/obmm/obmm_preimport_prefilled.c @@ -49,8 +49,7 @@ static int create_prefilled_preimport_range(const struct obmm_cmd_preimport *cmd ppr->ubmem_res = setup_ubmem_resource(cmd->pa, cmd->length, true); if (IS_ERR(ppr->ubmem_res)) { - pr_err("failed to setup ubmem resource on preimport. pa=%pa, size=%#llx, ret=%pe\n", - &cmd->pa, cmd->length, ppr->ubmem_res); + pr_err("failed to setup ubmem resource on preimport: ret=%pe\n", ppr->ubmem_res); kfree(ppr->bitmap); kfree(ppr); return PTR_ERR(ppr->ubmem_res); @@ -74,11 +73,11 @@ static int get_pa_mapping(phys_addr_t addr, struct prefilled_preimport_range **p ret = query_pa_range(addr, &info); if (ret) { - pr_err("No information found with PA=%pa.\n", &addr); + pr_err("No information found with PA requested.\n"); return ret; } if (info.user != OBMM_ADDR_USER_PREIMPORT) { - pr_err("PA=%pa is not a preimport address.\n", &addr); + pr_err("PA requested is not a preimport address.\n"); return -EINVAL; } if (info.data == not_ready_ptr) { @@ -87,7 +86,6 @@ static int get_pa_mapping(phys_addr_t addr, struct prefilled_preimport_range **p } *p_ppr = (struct prefilled_preimport_range *)info.data; - pr_debug("prefilled preimport range found with PA %pa.\n", &addr); return 0; } @@ -166,22 +164,18 @@ int preimport_release_prefilled(phys_addr_t start, phys_addr_t end) } /* must be an exact match */ if (ppr->pr.start != start || ppr->pr.end != end) { - pr_err("requested range touches ppr<%pa> but is not an exact match.\n", - &ppr->pr.start); + pr_err("requested range touches ppr but is not an exact match.\n"); ret = -EINVAL; goto err_unlock; } if (ppr->pr.use_count != 0) { - pr_err("ppr<%pa> cannot be released: %u active users found.\n", &ppr->pr.start, - ppr->pr.use_count); + pr_err("preimport cannot be released: %u active users found.\n", ppr->pr.use_count); ret = -EBUSY; goto err_unlock; } ret = preimport_release_common(&ppr->pr, false); - if (ret) { - pr_err("failed to release ppr<%pa>.\n", &ppr->pr.start); + if (ret) goto err_unlock; - } /* roll back is not possible from this point */ pa_range.start = ppr->pr.start; @@ -193,7 +187,6 @@ int preimport_release_prefilled(phys_addr_t start, phys_addr_t end) mutex_unlock(&preimport_mutex); destroy_prefilled_preimport_range(ppr); - pr_debug("ppr<%pa> released.\n", &start); return ret; err_unlock: @@ -211,11 +204,10 @@ static int get_ppr(phys_addr_t pa, struct prefilled_preimport_range **p_ppr) if (ret) goto out_unlock; if (ppr == not_ready_ptr) { - pr_err("ppr <%pa> not ready yet.\n", &pa); + pr_err("preimport requested not ready yet.\n"); ret = -EAGAIN; goto out_unlock; } - pr_debug("ppr <%pa> refcount: %u -> %u.\n", &pa, ppr->pr.use_count, ppr->pr.use_count + 1); ppr->pr.use_count += 1; *p_ppr = ppr; out_unlock: @@ -227,8 +219,6 @@ static void put_ppr(struct prefilled_preimport_range *ppr) { mutex_lock(&preimport_mutex); WARN_ON(ppr->pr.use_count == 0); - pr_debug("ppr <%pa> refcount: %u -> %u.\n", &ppr->pr.start, ppr->pr.use_count, - ppr->pr.use_count - 1); ppr->pr.use_count -= 1; mutex_unlock(&preimport_mutex); } @@ -241,8 +231,7 @@ static int occupy_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t spin_lock_irqsave(&ppr->bitmap_lock, flags); if (start < ppr->pr.start || end > ppr->pr.end) { - pr_err("requested range [%pa, %pa] is not managed by ppr [%pa, %pa].\n", &start, - &end, &ppr->pr.start, &ppr->pr.end); + pr_err("requested range is not managed by preimport.\n"); ret = -EINVAL; goto out_unlock; } @@ -252,15 +241,13 @@ static int occupy_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t for (bit = init_bit; bit <= end_bit; bit++) { if (test_bit(bit, ppr->bitmap)) { ret = -EEXIST; - pr_err("conflicts on preimport block %lu of ppr<%pa>.\n", bit, - &ppr->pr.start); + pr_err("requested range conflicts on preimport block %lu.\n", bit); goto out_unlock; } } for (bit = init_bit; bit <= end_bit; bit++) set_bit(bit, ppr->bitmap); - pr_debug("ppr<%pa>: bitmap[%lu, %lu] set.\n", &ppr->pr.start, init_bit, end_bit); out_unlock: spin_unlock_irqrestore(&ppr->bitmap_lock, flags); @@ -275,8 +262,7 @@ static int free_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t st spin_lock_irqsave(&ppr->bitmap_lock, flags); if (start < ppr->pr.start || end > ppr->pr.end) { - pr_err("requested range [%pa, %pa] is not managed by ppr [%pa, %pa].\n", &start, - &end, &ppr->pr.start, &ppr->pr.end); + pr_err("requested range is not managed by preimport.\n"); ret = -EINVAL; goto out_unlock; } @@ -286,15 +272,13 @@ static int free_ppr_blocks(struct prefilled_preimport_range *ppr, phys_addr_t st for (bit = init_bit; bit <= end_bit; bit++) { if (!test_bit(bit, ppr->bitmap)) { ret = -EINVAL; - pr_err("preimport block %lu of ppr<%pa> never used.\n", bit, - &ppr->pr.start); + pr_err("preimport block %lu never used.\n", bit); goto out_unlock; } } for (bit = init_bit; bit <= end_bit; bit++) clear_bit(bit, ppr->bitmap); - pr_debug("ppr<%pa>: bitmap[%lu, %lu] cleared.\n", &ppr->pr.start, init_bit, end_bit); out_unlock: spin_unlock_irqrestore(&ppr->bitmap_lock, flags); diff --git a/drivers/ub/obmm/ubmempool_allocator.c b/drivers/ub/obmm/ubmempool_allocator.c index f687dec91ca7..da6094288f7b 100644 --- a/drivers/ub/obmm/ubmempool_allocator.c +++ b/drivers/ub/obmm/ubmempool_allocator.c @@ -201,16 +201,15 @@ static int set_memseg_linear_mapping_invalid(struct memseg_node *node, bool set_ start_pfn = PHYS_PFN(node->addr); end_pfn = PHYS_PFN(node->addr + node->size); - pr_debug("call external: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_nc=%d)\n", - start_pfn, end_pfn, set_nc); + pr_debug("call external: set_linear_mapping_invalid(set_nc=%d)\n", set_nc); ret = set_linear_mapping_invalid(start_pfn, end_pfn, set_nc); if (ret) { - pr_err("failed to update kernel linear mapping cacheability for segment %#llx+%#lx, error=%pe.\n", - node->addr, node->size, ERR_PTR(ret)); + pr_err("failed to update kernel linear mapping cacheability: error=%pe.\n", + ERR_PTR(ret)); return ret; } - pr_debug("external called: set_linear_mapping_invalid(start_pfn=%#lx, end_pfn=%#lx, set_nc=%d, ret=%pe)\n", - start_pfn, end_pfn, set_nc, ERR_PTR(ret)); + pr_debug("external called: set_linear_mapping_invalid(set_nc=%d, ret=%pe)\n", + set_nc, ERR_PTR(ret)); return 0; } @@ -238,8 +237,6 @@ static struct memseg_node *hugetlb_pmd_alloc_memseg(struct conti_mem_allocator * goto out_free_seg; } - pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); - ret = set_memseg_linear_mapping_invalid(node, true); if (unlikely(ret)) goto out_free_seg; @@ -266,8 +263,6 @@ static void hugetlb_free_memseg(struct conti_mem_allocator *a __always_unused, return; } - pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); - folio = pfn_folio(node->addr >> PAGE_SHIFT); set_memseg_linear_mapping_invalid(node, false); @@ -303,8 +298,6 @@ static struct memseg_node *hugetlb_pud_alloc_memseg(struct conti_mem_allocator * goto out_free_seg; } - pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); - ret = set_memseg_linear_mapping_invalid(node, true); if (unlikely(ret)) goto out_free_seg; @@ -331,8 +324,6 @@ static void buddy_free_memseg(struct conti_mem_allocator *a __always_unused, return; } - pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); - folio = pfn_folio(node->addr >> PAGE_SHIFT); set_memseg_linear_mapping_invalid(node, false); @@ -369,8 +360,6 @@ static struct memseg_node *buddy_alloc_memseg(struct conti_mem_allocator *a) goto out_free_seg; } - pr_debug("%s: node %pa+%#lx\n", __func__, &node->addr, node->size); - ret = set_memseg_linear_mapping_invalid(node, true); if (unlikely(ret)) goto out_free_seg; -- Gitee From a4c72c4392bb90c050909b74cf5f7538e63494b6 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Wed, 17 Dec 2025 17:30:49 +0800 Subject: [PATCH 40/48] obmm: Fix race condition of region release and device release commit 0923efc7df209ddc774e3dedb51461ca57d8d91e openEuler Due to the Linux inode cache mechanism, the release callback of the device embedded in an OBMM region might execute after the region itself has been freed with kfree(region) . This creates a potential use-after-free vulnerability, as the callback would be operating on already deallocated memory. To address this, we need to implement a proper synchronization mechanism to ensure the region memory is only freed after the device release callback has completed execution. Fixes: b100600e5c18 ("obmm: Add shared memory device interface") Signed-off-by: Li Ruilin Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_core.h | 1 + drivers/ub/obmm/obmm_export.c | 2 ++ drivers/ub/obmm/obmm_export_from_pool.c | 2 ++ drivers/ub/obmm/obmm_export_from_user.c | 2 ++ drivers/ub/obmm/obmm_import.c | 5 +++++ drivers/ub/obmm/obmm_shm_dev.c | 11 +++++++++++ drivers/ub/obmm/obmm_shm_dev.h | 2 ++ 7 files changed, 25 insertions(+) diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index 4d844334dbad..ff3e717754ef 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -87,6 +87,7 @@ struct obmm_region { struct cdev cdevice; struct device device; + atomic_t device_released; refcount_t refcnt; diff --git a/drivers/ub/obmm/obmm_export.c b/drivers/ub/obmm/obmm_export.c index e1ec90cf15dd..237eb1e122f9 100644 --- a/drivers/ub/obmm/obmm_export.c +++ b/drivers/ub/obmm/obmm_export.c @@ -25,6 +25,7 @@ #include "obmm_core.h" #include "obmm_cache.h" #include "obmm_export.h" +#include "obmm_shm_dev.h" int export_flags_to_region_flags(unsigned long *region_flags, unsigned long user_flags) { @@ -266,6 +267,7 @@ int set_export_vendor(struct obmm_export_region *e_reg, const void __user *vendo void free_export_region(struct obmm_export_region *e_reg) { + wait_until_dev_released(&e_reg->region); if (e_reg->vendor_len) kfree(e_reg->vendor_info); diff --git a/drivers/ub/obmm/obmm_export_from_pool.c b/drivers/ub/obmm/obmm_export_from_pool.c index dabf5373ea2c..daa4214955da 100644 --- a/drivers/ub/obmm/obmm_export_from_pool.c +++ b/drivers/ub/obmm/obmm_export_from_pool.c @@ -255,6 +255,8 @@ static struct obmm_export_region *alloc_region_from_cmd(struct obmm_cmd_export * if (e_reg == NULL) return ERR_PTR(-ENOMEM); + atomic_set(&e_reg->region.device_released, 1); + e_reg->region.type = OBMM_EXPORT_REGION; e_reg->region.mem_size = total_size; e_reg->region.mem_cap = OBMM_MEM_ALLOW_CACHEABLE_MMAP | OBMM_MEM_ALLOW_NONCACHEABLE_MMAP; diff --git a/drivers/ub/obmm/obmm_export_from_user.c b/drivers/ub/obmm/obmm_export_from_user.c index bd0663bb197f..94e1c85b7190 100644 --- a/drivers/ub/obmm/obmm_export_from_user.c +++ b/drivers/ub/obmm/obmm_export_from_user.c @@ -279,6 +279,8 @@ alloc_export_region_from_obmm_cmd_export_pid(const struct obmm_cmd_export_pid *e if (e_reg == NULL) return ERR_PTR(-ENOMEM); + atomic_set(&e_reg->region.device_released, 1); + e_reg->mem_desc_pid.pid = export_pid->pid; e_reg->mem_desc_pid.user_va = export_pid->va; e_reg->region.mem_size = export_pid->length; diff --git a/drivers/ub/obmm/obmm_import.c b/drivers/ub/obmm/obmm_import.c index 18f59efa90c6..4a56a86f68f2 100644 --- a/drivers/ub/obmm/obmm_import.c +++ b/drivers/ub/obmm/obmm_import.c @@ -16,6 +16,7 @@ #include "obmm_preimport.h" #include "obmm_resource.h" #include "obmm_addr_check.h" +#include "obmm_shm_dev.h" static void set_import_region_datapath(const struct obmm_import_region *i_reg, struct obmm_datapath *datapath) @@ -415,6 +416,8 @@ int obmm_import(struct obmm_cmd_import *cmd_import) if (i_reg == NULL) return -ENOMEM; + atomic_set(&i_reg->region.device_released, 1); + /* arguments to region (logs produced by callee) */ retval = init_import_region_from_cmd(cmd_import, i_reg); if (retval) @@ -455,6 +458,7 @@ int obmm_import(struct obmm_cmd_import *cmd_import) out_region_uninit: uninit_obmm_region(&i_reg->region); out_free_ireg: + wait_until_dev_released(&i_reg->region); kfree(i_reg); return retval; } @@ -493,6 +497,7 @@ int obmm_unimport(const struct obmm_cmd_unimport *cmd_unimport) deregister_obmm_region(reg); uninit_obmm_region(reg); + wait_until_dev_released(&i_reg->region); kfree(i_reg); pr_info("%s: mem_id=%llu completed.\n", __func__, cmd_unimport->mem_id); diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index 0272287fea64..81b4b9d86bce 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -881,9 +881,18 @@ const struct file_operations obmm_shm_fops = { .owner = THIS_MODULE, static void obmm_shm_dev_release(struct device *dev) { + struct obmm_region *reg = container_of(dev, struct obmm_region, device); + + atomic_set(®->device_released, 1); module_put(THIS_MODULE); } +void wait_until_dev_released(struct obmm_region *reg) +{ + while (atomic_read(®->device_released) == 0) + cpu_relax(); +} + int obmm_shm_dev_add(struct obmm_region *reg) { int ret; @@ -918,6 +927,8 @@ int obmm_shm_dev_add(struct obmm_region *reg) goto err_put_dev; } + atomic_set(®->device_released, 0); + return 0; /* NOTE: If the device is properly initialized, the refcount of module diff --git a/drivers/ub/obmm/obmm_shm_dev.h b/drivers/ub/obmm/obmm_shm_dev.h index bfced747a2aa..e0bf3553a3ce 100644 --- a/drivers/ub/obmm/obmm_shm_dev.h +++ b/drivers/ub/obmm/obmm_shm_dev.h @@ -12,5 +12,7 @@ int obmm_shm_dev_init(void); void obmm_shm_dev_exit(void); int obmm_shm_dev_add(struct obmm_region *reg); void obmm_shm_dev_del(struct obmm_region *reg); +void wait_until_dev_released(struct obmm_region *reg); + #endif -- Gitee From 86167e065add2f75dda586bbc7df80643174f914 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Wed, 17 Dec 2025 18:17:41 +0800 Subject: [PATCH 41/48] obmm: Rollback mmap_granu when mmap failed commit 43087a8da5cc9401bc461906e47e3e28aeb312a8 openEuler During the mmap() handling, there's an oversight in the error recovery path for the mmap_granu field. When initializing a memory region, the code sets reg->mmap_granu to the requested granularity (either PAGE or PMD) but fails to revert this setting if the mapping operation encounters an error. This creates a persistent state where the region is marked as using a specific granularity, even though the actual mapping failed. This issue breaks the expected behavior where failed mapping attempts should not prevent users from retrying with different mapping parameters. Implementing proper rollback of the mmap_granu field in error paths would resolve this limitation. Fixes: 49ddfaab9aa3 ("obmm: Add mmap support for shared memory regions") Signed-off-by: Li Ruilin Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_shm_dev.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/ub/obmm/obmm_shm_dev.c b/drivers/ub/obmm/obmm_shm_dev.c index 81b4b9d86bce..0814c37d12b4 100644 --- a/drivers/ub/obmm/obmm_shm_dev.c +++ b/drivers/ub/obmm/obmm_shm_dev.c @@ -310,7 +310,7 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) unsigned long size, offset; uint8_t mem_state; enum obmm_mmap_mode old_mmap_mode; - enum obmm_mmap_granu mmap_granu; + enum obmm_mmap_granu mmap_granu, init_mmap_granu; int ret; bool cacheable, o_sync; @@ -331,21 +331,23 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) if (offset & OBMM_MMAP_FLAG_HUGETLB_PMD) { pr_debug("trying hugepage mmap\n"); - mmap_granu = OBMM_MMAP_GRANU_PMD; offset &= ~OBMM_MMAP_FLAG_HUGETLB_PMD; if (vma->vm_start % PMD_SIZE || vma->vm_end % PMD_SIZE) { pr_err("error running huge mmap for not pmd-aligned vma: %#lx-%#lx\n", vma->vm_start, vma->vm_end); return -EINVAL; } + mmap_granu = OBMM_MMAP_GRANU_PMD; } else { mmap_granu = OBMM_MMAP_GRANU_PAGE; } + init_mmap_granu = reg->mmap_granu; if (reg->mmap_granu == OBMM_MMAP_GRANU_NONE) { reg->mmap_granu = mmap_granu; } else if (reg->mmap_granu != mmap_granu) { pr_err("map with PAGE_SIZE and PMD_SIZE granu should not be mixed on the same region\n"); - return -EINVAL; + ret = -EPERM; + goto err_reset_mmap_granu; } vma->vm_pgoff = offset >> PAGE_SHIFT; @@ -353,7 +355,8 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) if (offset >= reg->mem_size || size > reg->mem_size - offset) { pr_err("mmap region %d: offset:%#lx, size:%#lx over region size: %#llx", reg->regionid, offset, size, reg->mem_size); - return -EINVAL; + ret = -EINVAL; + goto err_reset_mmap_granu; } /* @@ -404,6 +407,11 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) ret = init_ownership_info(reg); if (ret) goto err_release_local_state_info; + /* + * after ownership_info initialized, mmap_granu should not be + * reset to OBMM_MMAP_GRANU_NONE. + */ + init_mmap_granu = reg->mmap_granu; ret = check_mmap_allowed(reg, vma, mem_state); if (ret) goto err_release_local_state_info; @@ -454,6 +462,8 @@ static int obmm_shm_fops_mmap(struct file *file, struct vm_area_struct *vma) reg->mmap_mode = OBMM_MMAP_INIT; err_mutex_unlock: mutex_unlock(®->state_mutex); +err_reset_mmap_granu: + reg->mmap_granu = init_mmap_granu; return ret; } -- Gitee From a9993c4d0e10d1e5b2cbc2848685a15d170dad44 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Wed, 17 Dec 2025 18:30:43 +0800 Subject: [PATCH 42/48] obmm: Add FAST flag check for obmm_export_from_user commit a2b968884d48555aed651ebebe79e95726777bf4 openEuler The "FAST" flag does not actually take effect in the export_from_user function. Validation for this flag in the user input parameters needs to be added. Fixes: 2a3becabc966 ("obmm: Add user address export support") Signed-off-by: Li Ruilin Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_export_from_user.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ub/obmm/obmm_export_from_user.c b/drivers/ub/obmm/obmm_export_from_user.c index 94e1c85b7190..f136d3398753 100644 --- a/drivers/ub/obmm/obmm_export_from_user.c +++ b/drivers/ub/obmm/obmm_export_from_user.c @@ -255,6 +255,10 @@ static int obmm_cmd_export_pid_allowed(struct obmm_cmd_export_pid *cmd) pr_err("ALLOW_MMAP flag is not allowed in export_user_addr.\n"); return -EINVAL; } + if (cmd->flags & OBMM_EXPORT_FLAG_FAST) { + pr_err("FAST flag is not allowed in export_user_addr.\n"); + return -EINVAL; + } if (cmd->length == 0) { pr_err("export sizeof 0 memory is not allowed.\n"); -- Gitee From 0df65c1afd3c189a4d11704c16322b12c03290b8 Mon Sep 17 00:00:00 2001 From: Li Ruilin Date: Wed, 24 Dec 2025 11:50:29 +0800 Subject: [PATCH 43/48] obmm: Register resource for every memdev commit c8c43a60e1da059f77d910a19668a37d05fcfa82 openEuler Commit ac78ffc6e0c3 ("obmm: Add resource management support for imported memory") registered resources to the system for each import and preimport operation. However, these resources only included descriptions of the entire address segment, not resource descriptions for each device. This patch fixes this issue by registering corresponding resources for each device. The resource tree after the fix will roughly be as follows: 50000000000-50007ffffff : DIRECT_IMPORT_UBMEM 50000000000-50007ffffff : System RAM (Remote) 50000000000-50007ffffff : MEMID_1 50018000000-5001fffffff : PREIMPORT_UBMEM 50018000000-5001fffffff : System RAM (Remote) 50018000000-5001fffffff : MEMID_2 Fixes: ac78ffc6e0c3 ("obmm: Add resource management support for imported memory") Signed-off-by: Li Ruilin Signed-off-by: Wang Xin <2913220561@qq.com> --- drivers/ub/obmm/obmm_core.h | 2 ++ drivers/ub/obmm/obmm_import.c | 63 +++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/drivers/ub/obmm/obmm_core.h b/drivers/ub/obmm/obmm_core.h index ff3e717754ef..ea6a1f6b5a82 100644 --- a/drivers/ub/obmm/obmm_core.h +++ b/drivers/ub/obmm/obmm_core.h @@ -155,6 +155,8 @@ struct obmm_import_region { /* resource of the PA range */ struct ubmem_resource *ubmem_res; + /* the resource for this region */ + struct resource *memdev_res; u64 pa; /* imported NUMA node */ diff --git a/drivers/ub/obmm/obmm_import.c b/drivers/ub/obmm/obmm_import.c index 4a56a86f68f2..8982f1f3dfab 100644 --- a/drivers/ub/obmm/obmm_import.c +++ b/drivers/ub/obmm/obmm_import.c @@ -72,7 +72,7 @@ static int setup_pa(struct obmm_import_region *i_reg) end = i_reg->pa + i_reg->region.mem_size - 1; set_import_region_datapath(i_reg, &datapath); - return preimport_commit_prefilled(start, end, &datapath, &i_reg->numa_id, + ret = preimport_commit_prefilled(start, end, &datapath, &i_reg->numa_id, &i_reg->preimport_handle); if (ret) return ret; @@ -98,6 +98,10 @@ static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) { int ret, this_ret; + ret = lock_save_memdev_descendents(i_reg->ubmem_res); + if (ret) + return ret; + pr_info("call external: remove_memory_remote(nid=%d, size=%#llx)\n", i_reg->numa_id, i_reg->region.mem_size); ret = remove_memory_remote(i_reg->numa_id, i_reg->pa, i_reg->region.mem_size); @@ -106,7 +110,7 @@ static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) if (ret != 0 && !force) { pr_err("remove_memory_remote(nid=%d, size=%#llx) failed: ret=%pe.\n", i_reg->numa_id, i_reg->region.mem_size, ERR_PTR(ret)); - return ret; + goto out_recover_resource; } if (region_preimport(&i_reg->region)) { @@ -121,6 +125,8 @@ static int teardown_remote_numa(struct obmm_import_region *i_reg, bool force) } } +out_recover_resource: + restore_unlock_memdev_descendents(i_reg->ubmem_res); return ret; } @@ -193,6 +199,35 @@ static int free_addr_range(const struct obmm_import_region *i_reg) return 0; } +static int setup_iomem_resource(struct obmm_import_region *i_reg) +{ + struct resource *memdev_res; + + memdev_res = setup_memdev_resource(i_reg->ubmem_res, i_reg->pa, + i_reg->region.mem_size, i_reg->region.regionid); + if (IS_ERR(memdev_res)) { + pr_err("memid=%d: failed to setup memdev resource: %pe\n", + i_reg->region.regionid, memdev_res); + return PTR_ERR(memdev_res); + } + + i_reg->memdev_res = memdev_res; + + return 0; +} + +static int teardown_iomem_resource(struct obmm_import_region *i_reg) +{ + int ret; + + ret = release_memdev_resource(i_reg->ubmem_res, i_reg->memdev_res); + if (ret) + pr_err("memid=%d: failed to release memdev resource: %pe\n", + i_reg->region.regionid, ERR_PTR(ret)); + + return ret; +} + static int prepare_import_memory(struct obmm_import_region *i_reg) { int ret, rollback_ret; @@ -217,8 +252,20 @@ static int prepare_import_memory(struct obmm_import_region *i_reg) i_reg->numa_id = NUMA_NO_NODE; } - return 0; + ret = setup_iomem_resource(i_reg); + if (ret) + goto out_teardown_numa; + return 0; +out_teardown_numa: + if (region_numa_remote(&i_reg->region)) { + rollback_ret = teardown_remote_numa(i_reg, true); + if (rollback_ret) { + pr_err("failed to teardown remote numa on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + ret = -ENOTRECOVERABLE; + } + } out_teardown_pa: rollback_ret = teardown_pa(i_reg); if (rollback_ret) { @@ -240,6 +287,10 @@ static int release_import_memory(struct obmm_import_region *i_reg) { int ret, rollback_ret, old_numa_id; + ret = teardown_iomem_resource(i_reg); + if (ret) + return ret; + if (region_numa_remote(&i_reg->region)) { old_numa_id = i_reg->numa_id; ret = teardown_remote_numa(i_reg, false); @@ -284,6 +335,12 @@ static int release_import_memory(struct obmm_import_region *i_reg) } } err_teardown_numa: + rollback_ret = setup_iomem_resource(i_reg); + if (rollback_ret) { + pr_err("failed to restore iomem resource on rollback, ret=%pe.\n", + ERR_PTR(rollback_ret)); + return -ENOTRECOVERABLE; + } return ret; } -- Gitee From 8a278ed9a7512c9745fa2a2753b5c9a95b7e536d Mon Sep 17 00:00:00 2001 From: klmengkd Date: Mon, 12 Jan 2026 21:15:54 +0800 Subject: [PATCH 44/48] ubios_uvb: add check for cis message and senderid commit 878ed1dcd6341b5aae106e2a16c44443f0211d59 openEuler This patch add some checks for cis_message in cis call and poll thread process. And add check for senderid and receiverid can't be null. Signed-off-by: klmengkd --- .../firmware/ubios_uvb/cis/cis_info_process.c | 31 +++++++++++++++++++ .../firmware/ubios_uvb/cis/uvb_info_process.c | 6 ++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/ubios_uvb/cis/cis_info_process.c b/drivers/firmware/ubios_uvb/cis/cis_info_process.c index eb87fa528228..be43fbd27c67 100644 --- a/drivers/firmware/ubios_uvb/cis/cis_info_process.c +++ b/drivers/firmware/ubios_uvb/cis/cis_info_process.c @@ -568,6 +568,26 @@ int cis_call_remote(u32 call_id, u32 sender_id, u32 receiver_id, return cis_call_uvb(index, ¶); } +static bool check_msg_vaild(struct cis_message *msg) +{ + if (!msg) + return false; + + if (msg->input && !msg->input_size) + return false; + + if (!msg->input && msg->input_size) + return false; + + if (msg->output && (!msg->p_output_size || !*msg->p_output_size)) + return false; + + if (!msg->output && msg->p_output_size && *msg->p_output_size) + return false; + + return true; +} + /** * cis_call - Trigger a cis call with given aruguments. * @@ -590,6 +610,17 @@ int cis_call_by_uvb(u32 call_id, u32 sender_id, u32 receiver_id, pr_debug("cis call: call id %08x, sender id %08x, receiver id %08x\n", call_id, sender_id, receiver_id); + + if (!sender_id || !receiver_id) { + pr_err("senderid or receiverid can't be null\n"); + return -EINVAL; + } + + if (!check_msg_vaild(msg)) { + pr_err("check cis message invalid\n"); + return -EINVAL; + } + if (cis_call_for_me(receiver_id) || cis_call_for_local(receiver_id)) { func = search_local_cis_func(call_id, receiver_id); if (func) { diff --git a/drivers/firmware/ubios_uvb/cis/uvb_info_process.c b/drivers/firmware/ubios_uvb/cis/uvb_info_process.c index 24f95982ad72..654ff02efabb 100644 --- a/drivers/firmware/ubios_uvb/cis/uvb_info_process.c +++ b/drivers/firmware/ubios_uvb/cis/uvb_info_process.c @@ -193,7 +193,7 @@ static void uvb_polling_window(struct uvb_window_description *wd) func = search_local_cis_func(message_id, receiver_id); if (func) { err = func(&msg); - if (!err && msg.output && *msg.p_output_size) + if (!err && msg.output && msg.p_output_size && *msg.p_output_size) window->output_data_checksum = checksum32(msg.output, *msg.p_output_size); } else { @@ -216,7 +216,7 @@ static void uvb_polling_window(struct uvb_window_description *wd) goto free_resources; } err = cis_call_remote(message_id, UBIOS_MY_USER_ID, receiver_id, &msg, false); - if (!err && msg.output && *msg.p_output_size) + if (!err && msg.output && msg.p_output_size && *msg.p_output_size) window->output_data_checksum = checksum32(msg.output, *msg.p_output_size); pr_info("cis call forward end\n"); @@ -271,7 +271,7 @@ static int uvb_polling_window_sync(struct uvb_window_description *wd) func = search_local_cis_func(message_id, receiver_id); if (func) { err = func(&msg); - if (!err && msg.output && *msg.p_output_size) + if (!err && msg.output && msg.p_output_size && *msg.p_output_size) window->output_data_checksum = checksum32(msg.output, *msg.p_output_size); if (err) -- Gitee From 04aa14015844c168d145708d82d353b346715dc8 Mon Sep 17 00:00:00 2001 From: klmengkd Date: Mon, 12 Jan 2026 21:44:34 +0800 Subject: [PATCH 45/48] ubios_uvb: add check for uvb window buffer size commit 08a67ffae461171b314f46d2ee8c39af23ffb273 openEuler This patch add check for uvb window buffer size to avoid input or output msg exceeding window buffer. And align the output buffer. Signed-off-by: klmengkd --- drivers/firmware/ubios_uvb/cis/cis_info_process.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/ubios_uvb/cis/cis_info_process.c b/drivers/firmware/ubios_uvb/cis/cis_info_process.c index be43fbd27c67..eaa238af1680 100644 --- a/drivers/firmware/ubios_uvb/cis/cis_info_process.c +++ b/drivers/firmware/ubios_uvb/cis/cis_info_process.c @@ -265,12 +265,20 @@ static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, window = window_address; if (output_size) { - if (wd->size < *output_size + input_size) + if (wd->size < (u64)*output_size + (u64)input_size) { + pr_err("check wd size failed for output size\n"); return -EOVERFLOW; + } window->output_data_size = *output_size; + } else { + window->output_data_size = UVB_OUTPUT_SIZE_NULL; } if (input) { + if (wd->size < input_size) { + pr_err("check wd size failed for input size\n"); + return -EOVERFLOW; + } new_input = memremap(wd->buffer, wd->size, MEMREMAP_WC); if (!new_input) { pr_err("memremap for wd_buffer_virt_addr failed\n"); @@ -281,7 +289,7 @@ static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, } if (output) - new_output = (void *)(new_input + input_size); + new_output = (void *)(new_input + ALIGN(input_size, sizeof(u64))); io_params->input = new_input; io_params->input_size = input_size; @@ -290,7 +298,7 @@ static int fill_uvb_window_with_buffer(struct uvb_window_description *wd, window->input_data_address = new_input ? wd->buffer : 0; window->input_data_size = input_size; - window->output_data_address = new_output ? wd->buffer + input_size : 0; + window->output_data_address = new_output ? wd->buffer + ALIGN(input_size, sizeof(u64)) : 0; return 0; } -- Gitee From 03314f1590116ca47b7391668816896a065a4595 Mon Sep 17 00:00:00 2001 From: klmengkd Date: Mon, 12 Jan 2026 21:49:17 +0800 Subject: [PATCH 46/48] uvb: change dir name commit e440694adcf8b5ee8eab5914302130003d121a0e openEuler This patch change uvb dir name. Signed-off-by: klmengkd --- drivers/firmware/Kconfig | 2 +- drivers/firmware/Makefile | 2 +- drivers/firmware/{ubios_uvb => uvb}/Kconfig | 0 drivers/firmware/{ubios_uvb => uvb}/Makefile | 2 +- drivers/firmware/{ubios_uvb => uvb}/cis/cis_core.c | 0 drivers/firmware/{ubios_uvb => uvb}/cis/cis_info_process.c | 0 drivers/firmware/{ubios_uvb => uvb}/cis/cis_info_process.h | 0 drivers/firmware/{ubios_uvb => uvb}/cis/uvb_info_process.c | 0 drivers/firmware/{ubios_uvb => uvb}/cis/uvb_info_process.h | 0 drivers/firmware/{ubios_uvb => uvb}/include/cis_uvb_interface.h | 2 +- drivers/firmware/{ubios_uvb => uvb}/odf/odf_data.c | 0 drivers/firmware/{ubios_uvb => uvb}/odf/odf_get_fdt.c | 0 drivers/firmware/{ubios_uvb => uvb}/odf/odf_handle.h | 0 drivers/firmware/{ubios_uvb => uvb}/odf/odf_helper.c | 0 drivers/firmware/{ubios_uvb => uvb}/odf/odf_interface.h | 0 drivers/firmware/{ubios_uvb => uvb}/odf/odf_trans.c | 0 drivers/ub/sentry/sentry_uvb_comm.c | 2 +- drivers/ub/sentry/smh_common_type.h | 2 +- include/linux/firmware/{ubios => uvb}/cis.h | 0 19 files changed, 6 insertions(+), 6 deletions(-) rename drivers/firmware/{ubios_uvb => uvb}/Kconfig (100%) rename drivers/firmware/{ubios_uvb => uvb}/Makefile (86%) rename drivers/firmware/{ubios_uvb => uvb}/cis/cis_core.c (100%) rename drivers/firmware/{ubios_uvb => uvb}/cis/cis_info_process.c (100%) rename drivers/firmware/{ubios_uvb => uvb}/cis/cis_info_process.h (100%) rename drivers/firmware/{ubios_uvb => uvb}/cis/uvb_info_process.c (100%) rename drivers/firmware/{ubios_uvb => uvb}/cis/uvb_info_process.h (100%) rename drivers/firmware/{ubios_uvb => uvb}/include/cis_uvb_interface.h (98%) rename drivers/firmware/{ubios_uvb => uvb}/odf/odf_data.c (100%) rename drivers/firmware/{ubios_uvb => uvb}/odf/odf_get_fdt.c (100%) rename drivers/firmware/{ubios_uvb => uvb}/odf/odf_handle.h (100%) rename drivers/firmware/{ubios_uvb => uvb}/odf/odf_helper.c (100%) rename drivers/firmware/{ubios_uvb => uvb}/odf/odf_interface.h (100%) rename drivers/firmware/{ubios_uvb => uvb}/odf/odf_trans.c (100%) rename include/linux/firmware/{ubios => uvb}/cis.h (100%) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 68edaa3d6846..f970065c5290 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -314,6 +314,6 @@ source "drivers/firmware/psci/Kconfig" source "drivers/firmware/smccc/Kconfig" source "drivers/firmware/tegra/Kconfig" source "drivers/firmware/xilinx/Kconfig" -source "drivers/firmware/ubios_uvb/Kconfig" +source "drivers/firmware/uvb/Kconfig" endmenu diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index c60933fa11cb..defec11828e8 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -38,4 +38,4 @@ obj-y += psci/ obj-y += smccc/ obj-y += tegra/ obj-y += xilinx/ -obj-$(CONFIG_UDFI) += ubios_uvb/ +obj-$(CONFIG_UDFI) += uvb/ diff --git a/drivers/firmware/ubios_uvb/Kconfig b/drivers/firmware/uvb/Kconfig similarity index 100% rename from drivers/firmware/ubios_uvb/Kconfig rename to drivers/firmware/uvb/Kconfig diff --git a/drivers/firmware/ubios_uvb/Makefile b/drivers/firmware/uvb/Makefile similarity index 86% rename from drivers/firmware/ubios_uvb/Makefile rename to drivers/firmware/uvb/Makefile index ccb8b026c819..d5a261794d9e 100644 --- a/drivers/firmware/ubios_uvb/Makefile +++ b/drivers/firmware/uvb/Makefile @@ -2,7 +2,7 @@ # Create : 2025-04-18 # Description : cis odf Makefile -ccflags-y += -I$(srctree)/drivers/firmware/ubios_uvb/include +ccflags-y += -I$(srctree)/drivers/firmware/uvb/include obj-$(CONFIG_UDFI) = odf/odf_get_fdt.o obj-$(CONFIG_UDFI_ODF) += odf.o diff --git a/drivers/firmware/ubios_uvb/cis/cis_core.c b/drivers/firmware/uvb/cis/cis_core.c similarity index 100% rename from drivers/firmware/ubios_uvb/cis/cis_core.c rename to drivers/firmware/uvb/cis/cis_core.c diff --git a/drivers/firmware/ubios_uvb/cis/cis_info_process.c b/drivers/firmware/uvb/cis/cis_info_process.c similarity index 100% rename from drivers/firmware/ubios_uvb/cis/cis_info_process.c rename to drivers/firmware/uvb/cis/cis_info_process.c diff --git a/drivers/firmware/ubios_uvb/cis/cis_info_process.h b/drivers/firmware/uvb/cis/cis_info_process.h similarity index 100% rename from drivers/firmware/ubios_uvb/cis/cis_info_process.h rename to drivers/firmware/uvb/cis/cis_info_process.h diff --git a/drivers/firmware/ubios_uvb/cis/uvb_info_process.c b/drivers/firmware/uvb/cis/uvb_info_process.c similarity index 100% rename from drivers/firmware/ubios_uvb/cis/uvb_info_process.c rename to drivers/firmware/uvb/cis/uvb_info_process.c diff --git a/drivers/firmware/ubios_uvb/cis/uvb_info_process.h b/drivers/firmware/uvb/cis/uvb_info_process.h similarity index 100% rename from drivers/firmware/ubios_uvb/cis/uvb_info_process.h rename to drivers/firmware/uvb/cis/uvb_info_process.h diff --git a/drivers/firmware/ubios_uvb/include/cis_uvb_interface.h b/drivers/firmware/uvb/include/cis_uvb_interface.h similarity index 98% rename from drivers/firmware/ubios_uvb/include/cis_uvb_interface.h rename to drivers/firmware/uvb/include/cis_uvb_interface.h index bc9d5a858cce..513cb42372da 100644 --- a/drivers/firmware/ubios_uvb/include/cis_uvb_interface.h +++ b/drivers/firmware/uvb/include/cis_uvb_interface.h @@ -8,7 +8,7 @@ #ifndef CIS_UVB_INTERFACE_H #define CIS_UVB_INTERFACE_H -#include +#include /** * struct cis_group - call id service group diff --git a/drivers/firmware/ubios_uvb/odf/odf_data.c b/drivers/firmware/uvb/odf/odf_data.c similarity index 100% rename from drivers/firmware/ubios_uvb/odf/odf_data.c rename to drivers/firmware/uvb/odf/odf_data.c diff --git a/drivers/firmware/ubios_uvb/odf/odf_get_fdt.c b/drivers/firmware/uvb/odf/odf_get_fdt.c similarity index 100% rename from drivers/firmware/ubios_uvb/odf/odf_get_fdt.c rename to drivers/firmware/uvb/odf/odf_get_fdt.c diff --git a/drivers/firmware/ubios_uvb/odf/odf_handle.h b/drivers/firmware/uvb/odf/odf_handle.h similarity index 100% rename from drivers/firmware/ubios_uvb/odf/odf_handle.h rename to drivers/firmware/uvb/odf/odf_handle.h diff --git a/drivers/firmware/ubios_uvb/odf/odf_helper.c b/drivers/firmware/uvb/odf/odf_helper.c similarity index 100% rename from drivers/firmware/ubios_uvb/odf/odf_helper.c rename to drivers/firmware/uvb/odf/odf_helper.c diff --git a/drivers/firmware/ubios_uvb/odf/odf_interface.h b/drivers/firmware/uvb/odf/odf_interface.h similarity index 100% rename from drivers/firmware/ubios_uvb/odf/odf_interface.h rename to drivers/firmware/uvb/odf/odf_interface.h diff --git a/drivers/firmware/ubios_uvb/odf/odf_trans.c b/drivers/firmware/uvb/odf/odf_trans.c similarity index 100% rename from drivers/firmware/ubios_uvb/odf/odf_trans.c rename to drivers/firmware/uvb/odf/odf_trans.c diff --git a/drivers/ub/sentry/sentry_uvb_comm.c b/drivers/ub/sentry/sentry_uvb_comm.c index ef6af702263d..e7fef72e0670 100644 --- a/drivers/ub/sentry/sentry_uvb_comm.c +++ b/drivers/ub/sentry/sentry_uvb_comm.c @@ -6,7 +6,7 @@ * Create: 2025-04-23 */ -#include +#include #include #include #include diff --git a/drivers/ub/sentry/smh_common_type.h b/drivers/ub/sentry/smh_common_type.h index 2303afc2e529..069c04a23f08 100644 --- a/drivers/ub/sentry/smh_common_type.h +++ b/drivers/ub/sentry/smh_common_type.h @@ -9,7 +9,7 @@ #ifndef SMH_COMMON_TYPE_H #define SMH_COMMON_TYPE_H -#include +#include #include #include #include diff --git a/include/linux/firmware/ubios/cis.h b/include/linux/firmware/uvb/cis.h similarity index 100% rename from include/linux/firmware/ubios/cis.h rename to include/linux/firmware/uvb/cis.h -- Gitee From 86424ddb397a2655ed019b80cf78524417131324 Mon Sep 17 00:00:00 2001 From: tong_1001 Date: Tue, 13 Jan 2026 18:55:31 +0800 Subject: [PATCH 47/48] drivers/ub/sentry: release urma source when the ub device is removed commit cac6f8fb89051aa2c5a60699fc56ca98d8ea144f openEuler release urma source when the ub device is removed Signed-off-by: shixuantong Signed-off-by: guodashun Signed-off-by: shixuantong --- drivers/ub/sentry/sentry_urma_comm.c | 217 +++++++++++++++------------ 1 file changed, 121 insertions(+), 96 deletions(-) diff --git a/drivers/ub/sentry/sentry_urma_comm.c b/drivers/ub/sentry/sentry_urma_comm.c index 21ad57952e84..967a2f036541 100644 --- a/drivers/ub/sentry/sentry_urma_comm.c +++ b/drivers/ub/sentry/sentry_urma_comm.c @@ -220,12 +220,93 @@ static int compare_ubcore_eid(const union ubcore_eid src_eid, new_src_eid.in4.addr); return 0; } - pr_err("match eid failed, src eid:%llx, %x, %x, dst eid: %llx, %x, %x\n", - src_eid.in4.reserved, src_eid.in4.prefix, src_eid.in4.addr, - dst_eid.in4.reserved, dst_eid.in4.prefix, dst_eid.in4.addr); return -EINVAL; } + +/** + * unimport_tjetty - Unimport all target jetties for a specific die + * @die_index: Index of the die to unimport jetties from + * + * Return: 0 on success, -EINVAL on invalid die_index + * + * This function unimports all target jetties associated with a specific die + * index and cleans up the references. + */ +static int unimport_tjetty(int die_index) +{ + int i; + + if (die_index < 0 || die_index >= MAX_DIE_NUM) { + pr_err("invalid die_index (%d), range is [0, %d]\n", + die_index, MAX_DIE_NUM - 1); + return -EINVAL; + } + + for (i = 0; i < MAX_NODE_NUM; i++) { + if (sentry_urma_dev[die_index].tjetty[i]) { + ubcore_unimport_jetty(sentry_urma_dev[die_index].tjetty[i]); + sentry_urma_dev[die_index].tjetty[i] = NULL; + } + } + + return 0; +} + + +static void release_urma_dev_source(int die_index) +{ + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_info("urma %d dev is not exist, ignore to release the urma source.\n", die_index); + return; + } + + unimport_tjetty(die_index); + + if (sentry_urma_dev[die_index].jetty) { + ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); + sentry_urma_dev[die_index].jetty = NULL; + } + + if (sentry_urma_dev[die_index].s_seg) { + ubcore_unregister_seg(sentry_urma_dev[die_index].s_seg); + sentry_urma_dev[die_index].s_seg = NULL; + kfree(sentry_urma_dev[die_index].s_seg_va); + sentry_urma_dev[die_index].s_seg_va = NULL; + } + + if (sentry_urma_dev[die_index].r_seg) { + ubcore_unregister_seg(sentry_urma_dev[die_index].r_seg); + sentry_urma_dev[die_index].r_seg = NULL; + kfree(sentry_urma_dev[die_index].r_seg_va); + sentry_urma_dev[die_index].r_seg_va = NULL; + } + + if (sentry_urma_dev[die_index].jetty_jfr) { + ubcore_delete_jfr(sentry_urma_dev[die_index].jetty_jfr); + sentry_urma_dev[die_index].jetty_jfr = NULL; + } + + if (sentry_urma_dev[die_index].receiver_jfc) { + ubcore_delete_jfc(sentry_urma_dev[die_index].receiver_jfc); + sentry_urma_dev[die_index].receiver_jfc = NULL; + } + + if (sentry_urma_dev[die_index].sender_jfc) { + ubcore_delete_jfc(sentry_urma_dev[die_index].sender_jfc); + sentry_urma_dev[die_index].sender_jfc = NULL; + } + + sentry_urma_dev[die_index].sentry_ubcore_dev = NULL; + sentry_urma_dev[die_index].is_created = false; + + sentry_urma_dev[die_index].server_eid_valid_num = 0; + memset(&sentry_urma_dev[die_index].local_eid, 0, sizeof(sentry_urma_dev[die_index].local_eid)); + memset(sentry_urma_dev[die_index].server_eid, 0, sizeof(sentry_urma_dev[die_index].server_eid)); + memset(sentry_urma_dev[die_index].server_eid_array, 0, MAX_NODE_NUM * EID_MAX_LEN * sizeof(char)); +} + /** * sentry_add_device - Add URMA device to the device list * @dev: URMA device to add @@ -263,14 +344,24 @@ static int sentry_add_device(struct ubcore_device *dev) static void sentry_remove_device(struct ubcore_device *dev, void *d __always_unused) { struct ubcore_dev_list *dev_node; + int die_index = 0; + urma_mutex_lock_op(URMA_LOCK); list_for_each_entry(dev_node, &ub_dev_list_head, list) { if (dev_node->dev == dev) { + for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { + if (sentry_urma_dev[die_index].sentry_ubcore_dev == dev) { + pr_info("release the urma %d dev before remove the urma device\n", die_index); + release_urma_dev_source(die_index); + break; + } + } list_del(&dev_node->list); kfree(dev_node); break; } } + urma_mutex_lock_op(URMA_UNLOCK); } static struct ubcore_client sentry_ubcore_client = { @@ -313,34 +404,6 @@ void free_global_char(void) sentry_urma_ctx.urma_recv_sender_cr = NULL; } -/** - * unimport_tjetty - Unimport all target jetties for a specific die - * @die_index: Index of the die to unimport jetties from - * - * Return: 0 on success, -EINVAL on invalid die_index - * - * This function unimports all target jetties associated with a specific die - * index and cleans up the references. - */ -static int unimport_tjetty(int die_index) -{ - int i; - - if (die_index < 0 || die_index >= MAX_DIE_NUM) { - pr_err("invalid die_index (%d), range is [0, %d]\n", - die_index, MAX_DIE_NUM - 1); - return -EINVAL; - } - - for (i = 0; i < MAX_NODE_NUM; i++) { - if (sentry_urma_dev[die_index].tjetty[i]) { - ubcore_unimport_jetty(sentry_urma_dev[die_index].tjetty[i]); - sentry_urma_dev[die_index].tjetty[i] = NULL; - } - } - - return 0; -} /** * init_global_char - Initialize global character buffers @@ -445,6 +508,7 @@ int init_ubcore(void) return ret; } + /** * release_ubcore_resource - Release all URMA resources for all dies * @@ -468,44 +532,7 @@ static void release_ubcore_resource(void) /* Release resources for each die */ for (die_index = 0; die_index < MAX_DIE_NUM; die_index++) { - unimport_tjetty(die_index); - - if (sentry_urma_dev[die_index].jetty) { - ubcore_delete_jetty(sentry_urma_dev[die_index].jetty); - sentry_urma_dev[die_index].jetty = NULL; - } - - if (sentry_urma_dev[die_index].s_seg) { - ubcore_unregister_seg(sentry_urma_dev[die_index].s_seg); - sentry_urma_dev[die_index].s_seg = NULL; - kfree(sentry_urma_dev[die_index].s_seg_va); - sentry_urma_dev[die_index].s_seg_va = NULL; - } - - if (sentry_urma_dev[die_index].r_seg) { - ubcore_unregister_seg(sentry_urma_dev[die_index].r_seg); - sentry_urma_dev[die_index].r_seg = NULL; - kfree(sentry_urma_dev[die_index].r_seg_va); - sentry_urma_dev[die_index].r_seg_va = NULL; - } - - if (sentry_urma_dev[die_index].jetty_jfr) { - ubcore_delete_jfr(sentry_urma_dev[die_index].jetty_jfr); - sentry_urma_dev[die_index].jetty_jfr = NULL; - } - - if (sentry_urma_dev[die_index].receiver_jfc) { - ubcore_delete_jfc(sentry_urma_dev[die_index].receiver_jfc); - sentry_urma_dev[die_index].receiver_jfc = NULL; - } - - if (sentry_urma_dev[die_index].sender_jfc) { - ubcore_delete_jfc(sentry_urma_dev[die_index].sender_jfc); - sentry_urma_dev[die_index].sender_jfc = NULL; - } - - sentry_urma_dev[die_index].sentry_ubcore_dev = NULL; - sentry_urma_dev[die_index].is_created = false; + release_urma_dev_source(die_index); } urma_mutex_lock_op(URMA_UNLOCK); @@ -688,6 +715,11 @@ int sentry_post_recv(struct ubcore_jetty *r_jetty, struct ubcore_target_seg *rec return -EINVAL; } + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + return -EINVAL; + } + sge_addr = (uint64_t)sentry_urma_dev[die_index].r_seg_va + SGE_MAX_LEN * node_idx; sentry_urma_dev[die_index].r_sge[node_idx].addr = sge_addr; sentry_urma_dev[die_index].r_sge[node_idx].len = SGE_MAX_LEN; @@ -834,6 +866,12 @@ static struct ubcore_tjetty *create_tjetty(struct ubcore_tjetty_cfg *tjetty_cfg, int eid_index, int die_index) { int ret; + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + return NULL; + } + struct ubcore_get_tp_cfg tp_cfg = { .flag.bs.ctp = 1, .trans_mode = UBCORE_TP_RM, @@ -1083,8 +1121,6 @@ EXPORT_SYMBOL(match_index_by_remote_ub_eid); int sentry_create_urma_resource(union ubcore_eid eid[], int eid_num) { int ret; - bool is_the_same = true; - union ubcore_eid initial_value = {0}; int i; /* Prepare for new device matching by cleaning up old resources */ @@ -1097,30 +1133,6 @@ int sentry_create_urma_resource(union ubcore_eid eid[], int eid_num) } pr_info("ubcore init success\n"); - /* Check if the current EID configuration is the same as previous */ - for (i = 0; i < MAX_DIE_NUM; i++) { - if (memcmp(&eid[i], &initial_value, sizeof(union ubcore_eid)) == 0) - break; - - /* - * Settings are considered changed in two scenarios: - * 1. This is a new setting (no previous value exists). - * 2. A previous value exists, but the new value is different. - */ - if ((sentry_urma_dev[i].is_created && - memcmp(&sentry_urma_dev[i].local_eid, &eid[i], - sizeof(union ubcore_eid)) != 0) || - !sentry_urma_dev[i].is_created) { - is_the_same = false; - break; - } - } - - if (is_the_same) { - pr_info("New eid is the same with current eid, skip to create new resource\n"); - return 0; - } - /* Create resources for each EID */ for (i = 0; i < eid_num; i++) { sentry_urma_dev[i].sentry_ubcore_dev = @@ -1604,7 +1616,7 @@ static int heartbeat_thread(void *arg) /* Verify rebuilt connections */ if (rebuilt) { msleep_interruptible(HB_WAIT_ACK_SLEEP_MS); - memset(sentry_urma_ctx.heartbeat_thread_cr, 0, sizeof(sentry_urma_ctx.heartbeat_thread_cr)); + memset(sentry_urma_ctx.heartbeat_thread_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); if (!sentry_urma_ctx.is_panic_mode && !mutex_trylock(&sentry_urma_mutex)) @@ -1730,6 +1742,12 @@ static int rebuild_tjetty(int idx, int die_index) { struct ubcore_tjetty *tjetty_tmp = NULL; struct ubcore_tjetty *tjetty_to_clear = NULL; + + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + return -EINVAL; + } + struct ubcore_tjetty_cfg cfg = { .id.id = sentry_urma_ctx.client_jetty_id, .id.eid = sentry_urma_dev[die_index].server_eid[idx], @@ -1858,6 +1876,12 @@ static int sentry_post_jetty_send_wr(const char *buf, size_t len, int tjetty_idx return 0; } + if (!sentry_urma_dev[die_index].sentry_ubcore_dev) { + pr_err("%s failed: urma %d dev is not exist\n", __func__, die_index); + urma_mutex_lock_op(URMA_UNLOCK); + return -EINVAL; + } + tj_i = sentry_urma_dev[die_index].tjetty[tjetty_idx]; if (!sentry_urma_dev[die_index].jetty) { @@ -2087,6 +2111,7 @@ int urma_recv(char **buf_arr, size_t len) /* Check each die for incoming messages */ for (die_index = 0; die_index < sentry_urma_ctx.local_eid_num_configured; die_index++) { int cnt; + memset(sentry_urma_ctx.urma_recv_cr, 0, sizeof(struct ubcore_cr) * MAX_NODE_NUM); if (!sentry_urma_ctx.is_panic_mode && -- Gitee From 5bd82f00ee6a7ecb841f7c6538c3f2d92e770a26 Mon Sep 17 00:00:00 2001 From: klmengkd Date: Fri, 23 Jan 2026 18:12:45 +0800 Subject: [PATCH 48/48] uvb: change cis call default poll timeout commit 1c8fc3c202852934de867c572b6bf026c6b05490 openEuler change cis call default poll timeout. Signed-off-by: klmengkd --- drivers/firmware/uvb/cis/cis_core.c | 2 +- drivers/firmware/uvb/cis/cis_info_process.c | 9 +++++++-- drivers/firmware/uvb/cis/cis_info_process.h | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/uvb/cis/cis_core.c b/drivers/firmware/uvb/cis/cis_core.c index d1e5938b306c..4aa2d858ac67 100644 --- a/drivers/firmware/uvb/cis/cis_core.c +++ b/drivers/firmware/uvb/cis/cis_core.c @@ -136,7 +136,7 @@ void free_global_vars(void) void uninit_uvb(void) { uvb_poll_window_thread_stop(); - msleep(UVB_POLL_TIMEOUT); + msleep(1000); free_uvb_window_lock(); } diff --git a/drivers/firmware/uvb/cis/cis_info_process.c b/drivers/firmware/uvb/cis/cis_info_process.c index eaa238af1680..d0811b2bd9fe 100644 --- a/drivers/firmware/uvb/cis/cis_info_process.c +++ b/drivers/firmware/uvb/cis/cis_info_process.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "[UVB]: " fmt #include +#include #include #include #include @@ -19,6 +20,10 @@ #include "cis_info_process.h" #include "uvb_info_process.h" +static u32 uvb_poll_timeout = UVB_POLL_TIMEOUT; +module_param(uvb_poll_timeout, uint, 0644); +MODULE_PARM_DESC(uvb_poll_timeout, "set uvb poll timeout(ms), default 1200"); + LIST_HEAD(g_local_cis_list); DEFINE_SPINLOCK(cis_register_lock); struct cis_message *io_param_sync; @@ -341,7 +346,7 @@ int uvb_poll_window_call(struct uvb_window *window, u32 call_id) now = ktime_get(); time_interval = ktime_to_ms(ktime_sub(now, start)); - if (time_interval > UVB_POLL_TIMEOUT) + if (time_interval > uvb_poll_timeout) break; } @@ -355,7 +360,7 @@ int uvb_poll_window_call_sync(struct uvb_window *window, u32 call_id) int i; pr_info("start uvb window polling\n"); - for (i = 0; i < UVB_POLL_TIMEOUT_TIMES; i++) { + for (i = 0; i < uvb_poll_timeout * 10; i++) { if (window->message_id == ~call_id) return (int)window->returned_status; diff --git a/drivers/firmware/uvb/cis/cis_info_process.h b/drivers/firmware/uvb/cis/cis_info_process.h index ad2ed2467fda..4713e5306d77 100644 --- a/drivers/firmware/uvb/cis/cis_info_process.h +++ b/drivers/firmware/uvb/cis/cis_info_process.h @@ -15,8 +15,8 @@ #define CIS_USAGE_UVB 2 #define MAX_UVB_LOCK_IN_BITS 8 #define UVB_POLL_TIME_INTERVAL (100) /* 100us */ -#define UVB_POLL_TIMEOUT (1000) /* 1000ms */ -#define UVB_TIMEOUT_WINDOW_OBTAIN (10000) /* 10000ms */ +#define UVB_POLL_TIMEOUT (1200) /* 1200ms */ +#define UVB_TIMEOUT_WINDOW_OBTAIN (10000) /* 10000us */ #define UVB_POLL_TIMEOUT_TIMES (10000) /* 10000 times */ extern struct cis_message *io_param_sync; -- Gitee