diff --git a/raw_mcp_py39.zip b/raw_mcp_py39.zip deleted file mode 100644 index effba50c7cae2fd4dca3f8da9b46511e80a0a243..0000000000000000000000000000000000000000 Binary files a/raw_mcp_py39.zip and /dev/null differ diff --git a/systrace/include/common/util.cc b/systrace/include/common/util.cc index 8fb61e4bdb607d9c719bd260826da8a31bb1e3ba..50184c8da383cc4ef610a3bc41b14d07acef8ef4 100644 --- a/systrace/include/common/util.cc +++ b/systrace/include/common/util.cc @@ -9,6 +9,7 @@ #include #include +extern pid_t g_hooked_pid; namespace systrace { namespace util @@ -54,7 +55,7 @@ std::string GenerateClusterUniqueFilename(const std::string &suffix) gethostname(hostname, sizeof(hostname)); std::ostringstream oss; oss << hostname << "--" << std::setw(5) << std::setfill('0') - << config::GlobalConfig::Instance().rank << suffix; + << config::GlobalConfig::Instance().rank << "--" << config::GlobalConfig::Instance().pid << suffix; return oss.str(); } catch (const std::exception &e) @@ -124,6 +125,7 @@ void LoadEnvironmentVariables() { return env::EnvVarRegistry::GetEnvVar(name); }; config.rank = loadInt("RANK") ? loadInt("RANK") : loadInt("RANK_ID"); + config.pid = g_hooked_pid; config.job_name = loadStr("ENV_ARGO_WORKFLOW_NAME"); config.local_rank = loadInt("LOCAL_RANK") ? loadInt("LOCAL_RANK") : loadInt("DEVICE_ID"); config.local_world_size = loadInt("LOCAL_WORLD_SIZE"); diff --git a/systrace/include/common/util.h b/systrace/include/common/util.h index ea166d535907e1c5cce42efc9c8dd0e603cabac4..0290edc180c954253ff3c1aa0922328c42037c8e 100644 --- a/systrace/include/common/util.h +++ b/systrace/include/common/util.h @@ -33,6 +33,7 @@ struct GlobalConfig uint32_t local_rank{0}; uint32_t world_size{0}; uint32_t local_world_size{0}; + pid_t pid{0}; std::string job_name; bool enable{true}; std::vector devices; diff --git a/systrace/src/ascend/hook.cc b/systrace/src/ascend/hook.cc index 01c55b1b7dbda196a969fe402c81702f63d54168..017f2f3af7ec98e364da466def81a70294202585 100644 --- a/systrace/src/ascend/hook.cc +++ b/systrace/src/ascend/hook.cc @@ -96,6 +96,7 @@ extern "C" EXPOSE_API aclError aclInit(const char *configPath) { + g_hooked_pid = getpid(); HOOKED_FUNCTION(orig_aclInit, "aclInit", configPath); } diff --git a/systrace/src/ascend/hook.h b/systrace/src/ascend/hook.h index e0e1f34a09edc35c80e27de14630f5bb7c2b5325..22d2a17ac102a8e6990cc779e8746adaecc72d8f 100644 --- a/systrace/src/ascend/hook.h +++ b/systrace/src/ascend/hook.h @@ -9,6 +9,7 @@ extern "C" { #endif + pid_t g_hooked_pid = 0; typedef int aclError; typedef void *aclrtStream; typedef void *aclrtFuncHandle; diff --git a/systrace/src/cann/common_hook.c b/systrace/src/cann/common_hook.c index 7a401a3a22e06cdd34073ec095031889220a4dc5..35f8e837882c4d79a363536215b2998adb4f5af0 100644 --- a/systrace/src/cann/common_hook.c +++ b/systrace/src/cann/common_hook.c @@ -39,17 +39,17 @@ void get_log_filename(char *buf, size_t buf_size, const char *path_suffix) { char path[PATH_LEN] = {0}; int ret = snprintf(path, sizeof(path), "%s/%s", SYS_TRACE_ROOT_DIR, path_suffix); if (ret < 0 || (size_t)ret >= sizeof(path)) { - snprintf(buf, buf_size, "%s_trace_rank%d.pb", path_suffix, rank); + snprintf(buf, buf_size, "%s_trace_rank%d_%d.pb", path_suffix, rank, g_hooked_pid); return; } if (access(path, F_OK) != 0) { if (mkdir(path, 0755) != 0 && errno != EEXIST) { perror("Failed to create directory"); - snprintf(buf, buf_size, "%s_trace_rank%d.pb", path_suffix, rank); + snprintf(buf, buf_size, "%s_trace_rank%d_%d.pb", path_suffix, rank, g_hooked_pid); return; } } - snprintf(buf, buf_size, "%s/%s_trace_rank%d.pb", path, path_suffix, rank); + snprintf(buf, buf_size, "%s/%s_trace_rank%d_%d.pb", path, path_suffix, rank, g_hooked_pid); } void *load_symbol(void *lib, const char *symbol_name) diff --git a/systrace/src/cann/common_hook.h b/systrace/src/cann/common_hook.h index 56b34dd4f6571f5874a6934023113f7c3e1c9e54..6a77f01cc0b8473b7ffec395e9cb3c7c6e2ee629 100644 --- a/systrace/src/cann/common_hook.h +++ b/systrace/src/cann/common_hook.h @@ -12,6 +12,7 @@ #include #include #include + #if defined(__aarch64__) #include "../../thirdparty/aarch64/libunwind/libunwind.h" #elif defined(__x86_64__) @@ -24,6 +25,7 @@ #define LOG_ITEMS_MIN 10 #define PATH_LEN 256 +extern pid_t g_hooked_pid; uint64_t get_current_us(); const char *get_so_name(uint64_t ip); unw_word_t get_so_base(unw_word_t addr); diff --git a/systrace/src/os/os_probe.c b/systrace/src/os/os_probe.c index 3aac1ee0363e00fdd51e181467d970531a55610b..a9636bb35c36ad4bf72375e59ab830e33afa0ab0 100644 --- a/systrace/src/os/os_probe.c +++ b/systrace/src/os/os_probe.c @@ -23,6 +23,7 @@ #include #include + #ifdef BPF_PROG_KERN #undef BPF_PROG_KERN #endif @@ -79,6 +80,7 @@ static pthread_mutex_t file_mutex = PTHREAD_MUTEX_INITIALIZER; int g_stop = 0; +extern pid_t g_hooked_pid; static pthread_key_t thread_data_key; static pthread_once_t key_once = PTHREAD_ONCE_INIT; static int rank; @@ -239,15 +241,15 @@ static void get_log_filename(time_t current, char *buf, if (mkdir(dir_path, 0755) != 0 && errno != EEXIST) { perror("Failed to create directory"); - snprintf(buf, buf_size, "os_trace_%04d%02d%02d_%02d_rank_%d.pb", + snprintf(buf, buf_size, "os_trace_%04d%02d%02d_%02d_rank_%d_%d.pb", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, - tm->tm_hour, rank); + tm->tm_hour, rank, g_hooked_pid); return; } } - snprintf(buf, buf_size, "%s/os_trace_%04d%02d%02d_%02d_rank_%d.pb", + snprintf(buf, buf_size, "%s/os_trace_%04d%02d%02d_%02d_rank_%d_%d.pb", dir_path, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, - tm->tm_hour, rank); + tm->tm_hour, rank, g_hooked_pid); } static char is_ready_to_write(OSprobe_ThreadData *td, time_t *current)