diff --git a/msmonitor/dynolog_npu/cli/Cargo.toml b/msmonitor/dynolog_npu/cli/Cargo.toml index 5b5917add32ae098b092eda4fc127234b5624655..6b2a8eb3d300bd3454813f6f1564b5eafc3fcb80 100644 --- a/msmonitor/dynolog_npu/cli/Cargo.toml +++ b/msmonitor/dynolog_npu/cli/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [dependencies] anyhow = "1.0.57" -clap = { version = "3.1.0", features = ["derive"]} +clap = { version = "4.4", features = ["derive"]} serde_json = "1.0" rustls = "0.21.0" rustls-pemfile = "1.0" @@ -18,16 +18,6 @@ num-bigint = "0.4" openssl = { version = "0.10", features = ["vendored"] } rpassword = "7.2.0" -[net] -git-fetch-with-cli = true - -[build] -rustflags = [ - "-C", "relocation_model=pie", - "-C", "link-args=-Wl,-z,now", - "-C", "link-args=-Wl,-z,relro", - "-C", "strip=symbols", - "-C", "overflow_checks", - "-C", "link-args=-static-libgcc", - "-C", "link-args=-static-libstdc++" -] +[features] +default = ["openssl"] +openssl = [] diff --git a/msmonitor/dynolog_npu/cli/src/main.rs b/msmonitor/dynolog_npu/cli/src/main.rs index 2bd85a79637691fe3adbba544d83cc8e991e690f..6df6dfb19eae5a4d5423e96d5d18f3aacfd03216 100644 --- a/msmonitor/dynolog_npu/cli/src/main.rs +++ b/msmonitor/dynolog_npu/cli/src/main.rs @@ -3,7 +3,7 @@ // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree. use std::fs::File; -use std::io::BufReader; +use std::io::{BufReader, Read}; use rustls::{Certificate, RootCertStore, PrivateKey, ClientConnection, StreamOwned}; use std::sync::Arc; use std::net::TcpStream; @@ -23,7 +23,6 @@ use x509_parser::public_key::RSAPublicKey; use x509_parser::der_parser::oid; use num_bigint::BigUint; use openssl::pkey::PKey; -use std::io::Read; // Make all the command modules accessible to this file. mod commands; @@ -55,14 +54,15 @@ const DYNO_PORT: u16 = 1778; const MIN_RSA_KEY_LENGTH: u64 = 3072; // 最小 RSA 密钥长度(位) #[derive(Debug, Parser)] +#[command(author, version, about, long_about = None)] struct Opts { - #[clap(long, default_value = "localhost")] + #[arg(long, default_value = "localhost")] hostname: String, - #[clap(long, default_value_t = DYNO_PORT)] + #[arg(long, default_value_t = DYNO_PORT)] port: u16, - #[clap(long, required = true)] + #[arg(long, required = true)] certs_dir: String, - #[clap(subcommand)] + #[command(subcommand)] cmd: Command, } @@ -112,44 +112,44 @@ enum Command { /// Capture gputrace Gputrace { /// Job id of the application to trace. - #[clap(long, default_value_t = 0)] + #[arg(long, default_value_t = 0)] job_id: u64, /// List of pids to capture trace for (comma separated). - #[clap(long, default_value = "0")] + #[arg(long, default_value = "0")] pids: String, /// Duration of trace to collect in ms. - #[clap(long, default_value_t = 500)] + #[arg(long, default_value_t = 500)] duration_ms: u64, /// Training iterations to collect, this takes precedence over duration. - #[clap(long, default_value_t = -1)] + #[arg(long, default_value_t = -1)] iterations: i64, /// Log file for trace. - #[clap(long)] + #[arg(long)] log_file: String, /// Unix timestamp used for synchronized collection (milliseconds since epoch). - #[clap(long, default_value_t = 0)] + #[arg(long, default_value_t = 0)] profile_start_time: u64, /// Start iteration roundup, starts an iteration based trace at a multiple /// of this value. - #[clap(long, default_value_t = 1)] + #[arg(long, default_value_t = 1)] profile_start_iteration_roundup: u64, /// Max number of processes to profile. - #[clap(long, default_value_t = 3)] + #[arg(long, default_value_t = 3)] process_limit: u32, /// Record PyTorch operator input shapes and types. - #[clap(long, action)] + #[arg(long)] record_shapes: bool, /// Profile PyTorch memory. - #[clap(long, action)] + #[arg(long)] profile_memory: bool, /// Capture Python stacks in traces. - #[clap(long, action)] + #[arg(long)] with_stacks: bool, /// Annotate operators with analytical flops. - #[clap(long, action)] + #[arg(long)] with_flops: bool, /// Capture PyTorch operator modules in traces. - #[clap(long, action)] + #[arg(long)] with_modules: bool, }, /// Capture nputrace. Subcommand functions aligned with Ascend Torch Profiler. diff --git a/msmonitor/scripts/build.sh b/msmonitor/scripts/build.sh index 8ee66edc1b8f940a72deaf8737f95ddf46d4a9c6..d51d2191334e49f2752e08f94182ab55f163ee51 100644 --- a/msmonitor/scripts/build.sh +++ b/msmonitor/scripts/build.sh @@ -4,6 +4,29 @@ export BUILD_PROMETHEUS=0 export BUILD_TENSORBOARD=1 export USE_TENSORBOARD="OFF" +# 设置 CARGO_HOME +export CARGO_HOME="/root/.cargo" + +# 创建 Cargo 配置目录 +mkdir -p ${CARGO_HOME} + +# 创建 config.toml(安全编译选项) +cat > ${CARGO_HOME}/config.toml << EOF +[net] +git-fetch-with-cli = true + +[build] +rustflags = [ + "-C", "relocation_model=pie", + "-C", "link-args=-Wl,-z,now", + "-C", "link-args=-Wl,-z,relro", + "-C", "strip=symbols", + "-C", "overflow_checks", + "-C", "link-args=-static-libgcc", + "-C", "link-args=-static-libstdc++" +] +EOF + check_gcc_version() { if ! command -v gcc >/dev/null 2>&1; then echo "ERROR: gcc command not found"