From adf98177bd7557b61f5aed0789eb638279044edc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=8E=A2=E7=85=A7=E7=81=AF?= <8536419+qq1329009851@user.noreply.gitee.com> Date: Fri, 14 Mar 2025 07:40:53 +0000 Subject: [PATCH] [Huawei] add ras_base_alloc.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Offering:BeiMing 25.0.RC1 Signed-off-by: 探照灯 <8536419+qq1329009851@user.noreply.gitee.com> --- orte/mca/ras/base/ras_base_allocate.c | 70 +++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index 382a87c0f5..90f264a788 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -104,6 +104,47 @@ void orte_ras_base_display_alloc(void) free(tmp); } +static void get_alloc(char *alloc_path, opal_list_t *nodes) +{ + orte_node_t *node = NULL; + FILE *fp; + fp = fopen(alloc_path, "r"); + if (NULL == fp) { + return; + } + char *line = NULL; + size_t len = 0; + ssize_t read; + while ((read = getline(&line, &len, fp)) != -1) { + char hostname[DONAU_MAX_NODENAME_LENGTH] = {0}; + int num_kernels = 0; + int slots = 0; + if (sscanf(line, "%s %d %d", hostname, &num_kernels, &slots) != 3) { + opal_output_verbose(10, orte_ras_base_framework.framework_output, + "ras/donau: Get the wrong num of params in CCS_ALLOC_FILE"); + break; + } + + node = OBJ_NEW(orte_node_t); + node->name = strdup(hostname); + // Strip off the FQDN if present, ignore IP addresses + if (!orte_keep_fqdn_hostnames && !opal_net_isaddr(node->name)) { + char *ptr; + if (NULL != (ptr = strchr(node->name, '.'))) { + *ptr = '\0'; + } + } + node->state = ORTE_NODE_STATE_UP; + node->slots_inuse = 0; + node->slots_max = 0; + node->slots = slots; + opal_list_append(nodes, &node->super); + } + free(line); + fclose(fp); + return; +} + /* * Function for selecting one component from all those that are * available. @@ -376,6 +417,35 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata) goto DISPLAY; } + char *alloc_path = NULL; + if (NULL != (alloc_path = getenv("CCS_ALLOC_FILE")) && DONAU_SSH == orte_donau_launch_type) { + get_alloc(alloc_path, &nodes); + } + + /* if something was found in the hostfiles(s), we use that as our global + * pool - set it and we are done + */ + if (!opal_list_is_empty(&nodes)) { + /* flag that the allocation is managed */ + orte_managed_allocation = true; + /* since it is managed, we do not attempt to resolve + * the nodenames */ + opal_if_do_not_resolve = true; + /* store the results in the global resource pool - this removes the + * list items + */ + if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&nodes); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + /* cleanup */ + OBJ_DESTRUCT(&nodes); + goto DISPLAY; + } + /* if nothing was found so far, then look for a default hostfile */ if (NULL != orte_default_hostfile) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, -- Gitee