diff --git a/tutorials/source_en/debug/profiler.md b/tutorials/source_en/debug/profiler.md index 8fd491f14209808cffd6db471afee3bda2812606..34b5508d0904ba58e8133096093320355cb41474 100644 --- a/tutorials/source_en/debug/profiler.md +++ b/tutorials/source_en/debug/profiler.md @@ -411,3 +411,196 @@ schedule configuration related parameters have 5 parameters: wait, warmup, activ #### schedule and step Configuration Mismatch Problem Normally, the schedule configuration should be less than the number of model training times, that is, repeat*(wait+warmup+active)+skip_first should be less than the number of model training times. If the schedule configuration is greater than the number of model training times, Profiler will throw an exception warning, but this will not interrupt the model training, but there may be incomplete data collection and analysis. + +### host_sys is passing in the osrt parameter configuration issue + +When the current host_sys parameter is passed into osrt, the perf third-party tool needs to be installed. For installation details, please refer to [Installation of perf tools](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0137.html#ZH-CN_TOPIC_0000002240286638) Consistent process, Just replace msprof_data_collection.sh with the following: + +```bash + #!/bin/bash + # This script is used to run perf/iotop/ltrace by profiling. + + command_type=$1 + command_param=$2 + script_dir="/usr/bin" + script_name="$(basename "$0")" + reg_int='^[1-9][0-9]{,6}$|^0$' + + function get_version(){ + if [ "${command_param}" = "perf" ] || [ "${command_param}" = "ltrace" ] || [ "${command_param}" = "iotop" ]; then + "${command_param}" --version + else + printf "The value of the second parameter is incorrect, please enter the correct parameter, " + printf "such as: perf, ltrace, iotop\n" + exit 1 + fi + } + + function kill_prof_cmd(){ + if [[ ${command_param} =~ ${reg_int} ]]; then + ppid=`ps -o ppid= -p ${command_param}` + ppid_user=$(ps -o uid -e -o pid | awk -va="${ppid}" '$2==a {print $1}') + shell_user=`id -u ${SUDO_USER}` + if [ "${ppid_user}" != "${shell_user}" ]; then + echo "UID of ${ppid} is:${ppid_user}, UID running this script is:${shell_user}" + exit 1 + fi + pidLine=`pstree -p ${command_param}` + pidLine=`echo $pidLine | awk 'BEGIN{ FS="(" ; RS=")" } NF>1 { print $NF }'` + for pid in $pidLine + do + sudo kill -2 ${pid} + done + exit 1 + else + echo "Input pid:${command_param} error" + exit 1 + fi + } + + # The user currently running this script should be consistent with the user to which the pid process belongs + function check_pid(){ + if [[ ! ${command_param} =~ ${reg_int} ]]; then + echo "Input pid:${command_param} error" + exit 1 + fi + params=$(cat /proc/sys/kernel/pid_max) + if [[ ! "$params" =~ ${reg_int} ]]; then + echo "Get max_pid error" + exit 1 + fi + if [ "${command_param}" -gt "${params}" ]; then + echo "Input pid:${command_param} gt pid_max:${params}" + exit 1 + fi + pid_user=$(ps -o uid -e -o pid | awk -va="${command_param}" '$2==a {print $1}') + shell_user=`id -u ${SUDO_USER}` + if [ "${pid_user}" != "${shell_user}" ]; then + echo "UID of ${command_param} is:${pid_user}, UID running this script is:${shell_user}" + exit 1 + fi + } + + function run_prof_trace_cmd(){ + check_pid + perf trace -T --syscalls -p "${command_param}" + } + + function run_ltrace_cmd(){ + check_pid + } + + function run_iotop_cmd(){ + check_pid + iotop -b -d 0.02 -P -t -p "${command_param}" + } + + function check_username(){ + echo "${command_param}" | grep -q -E '^[ 0-9a-zA-Z./:]*$' + result=$? + if [ "$result" -ne 0 ]; then + echo "Parameter:${command_param} is invalied!" + exit 1 + fi + if ! id -u "${command_param}" >/dev/null 2>&1 ; then + echo "User:${command_param} does not exist" + exit 1 + fi + } + + function get_cmd(){ + params=$(cat /proc/sys/kernel/pid_max) + if [[ ! "$params" =~ ${reg_int} ]]; then + echo "Get max_pid error" + exit 1 + fi + digits=1 + while ((${params}>10)); do + let "digits++" + ((params /= 10)) + done + compile='[1-9]' + arr[0]='[0-9]' + for((i=1;i /etc/sudoers.d/"${command_param}"_profiling + result=$? + if [ "$result" -ne 0 ]; then + echo "Set cmd to /etc/sudoers.d/${command_param}_profiling failed!" + exit 1 + else + echo "The user permission have been configured successfully. You can find the configuration file /etc/sudoers.d/${command_param}_profiling" + exit + fi + fi + has_add=$(cat /etc/sudoers|grep "${script_name}"|grep "${command_param}") + if [ "${has_add}" ]; then + echo "The configure already exist, please confirm its content is correct" + exit + fi + chmod u+w /etc/sudoers + result=$? + if [ "$result" -ne 0 ]; then + echo "Permission configure failed" + exit 1 + fi + echo "${cmd}" >> /etc/sudoers + chmod u-w /etc/sudoers + echo "The user permission have been configured successfully. You can find the configuration file in the /etc/sudoers." + } + + function handle_sudoers(){ + check_username + get_cmd + set_sudoers + } + + function main(){ + if [ $# -ne 2 ]; then + echo "The number of parameters is incorrect, please enter two parameters" + exit 1 + fi + if [ "${command_type}" = "set-sudoers" ]; then + echo "Run set-sudoers cmd" + handle_sudoers + elif [ "${command_type}" = "get-version" ]; then + #echo "Run get-version cmd" + get_version + elif [ "${command_type}" = "kill" ]; then + #echo "kill cmd" + kill_prof_cmd + elif [ "${command_type}" = "perf" ]; then + #echo "run perf trace cmd" + run_prof_trace_cmd + elif [ "${command_type}" = "ltrace" ] ; then + #echo "run ltrace cmd" + run_ltrace_cmd + elif [ "${command_type}" = "iotop" ]; then + #echo "run iotop cmd" + run_iotop_cmd + else + printf "The value of the first parameter is incorrect, please enter the correct parameter, " + printf "such as: set-sudoers, get-version, kill, perf, ltrace, iotop\n" + exit 1 + fi + } + + main "$@" + ``` \ No newline at end of file diff --git a/tutorials/source_zh_cn/debug/profiler.md b/tutorials/source_zh_cn/debug/profiler.md index 0923219e9ef4728c05eb42b025d6a03d6aa6d597..f76e5ab273bce0b7a5a9c50ed7c72f8da389027f 100644 --- a/tutorials/source_zh_cn/debug/profiler.md +++ b/tutorials/source_zh_cn/debug/profiler.md @@ -431,3 +431,200 @@ schedule配置相关参数有5个:wait、warmup、active、repeat、skip_first #### schedule与step配置不匹配问题 正常来说schedule的配置应小于模型训练的次数,即repeat*(wait+warmup+active)+skip_first应小于模型训练的次数。如果schedule的配置大于模型训练的次数,Profiler会抛出异常警告,但这并不会打断模型训练,但可能存在采集解析的数据不全的情况。 + +### host_sys传入osrt参数配置问题 + +当前host_sys参数传入osrt时需要安装perf第三方工具,安装详情请参考[perf工具安装](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0136.html),安装完第三方工具后,需要配置用户权限 + +配置用户权限: + +- 该流程与[配置用户权限](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0137.html#ZH-CN_TOPIC_0000002240286638)过程一致,只需要把msprof_data_collection.sh替换为如下即可: + +```bash + #!/bin/bash + # This script is used to run perf/iotop/ltrace by profiling. + + command_type=$1 + command_param=$2 + script_dir="/usr/bin" + script_name="$(basename "$0")" + reg_int='^[1-9][0-9]{,6}$|^0$' + + function get_version(){ + if [ "${command_param}" = "perf" ] || [ "${command_param}" = "ltrace" ] || [ "${command_param}" = "iotop" ]; then + "${command_param}" --version + else + printf "The value of the second parameter is incorrect, please enter the correct parameter, " + printf "such as: perf, ltrace, iotop\n" + exit 1 + fi + } + + function kill_prof_cmd(){ + if [[ ${command_param} =~ ${reg_int} ]]; then + ppid=`ps -o ppid= -p ${command_param}` + ppid_user=$(ps -o uid -e -o pid | awk -va="${ppid}" '$2==a {print $1}') + shell_user=`id -u ${SUDO_USER}` + if [ "${ppid_user}" != "${shell_user}" ]; then + echo "UID of ${ppid} is:${ppid_user}, UID running this script is:${shell_user}" + exit 1 + fi + pidLine=`pstree -p ${command_param}` + pidLine=`echo $pidLine | awk 'BEGIN{ FS="(" ; RS=")" } NF>1 { print $NF }'` + for pid in $pidLine + do + sudo kill -2 ${pid} + done + exit 1 + else + echo "Input pid:${command_param} error" + exit 1 + fi + } + + #当前跑这个脚本的用户和pid进程所属的用户要一致 + function check_pid(){ + if [[ ! ${command_param} =~ ${reg_int} ]]; then + echo "Input pid:${command_param} error" + exit 1 + fi + params=$(cat /proc/sys/kernel/pid_max) + if [[ ! "$params" =~ ${reg_int} ]]; then + echo "Get max_pid error" + exit 1 + fi + if [ "${command_param}" -gt "${params}" ]; then + echo "Input pid:${command_param} gt pid_max:${params}" + exit 1 + fi + pid_user=$(ps -o uid -e -o pid | awk -va="${command_param}" '$2==a {print $1}') + shell_user=`id -u ${SUDO_USER}` + if [ "${pid_user}" != "${shell_user}" ]; then + echo "UID of ${command_param} is:${pid_user}, UID running this script is:${shell_user}" + exit 1 + fi + } + + function run_prof_trace_cmd(){ + check_pid + perf trace -T --syscalls -p "${command_param}" + } + + function run_ltrace_cmd(){ + check_pid + } + + function run_iotop_cmd(){ + check_pid + iotop -b -d 0.02 -P -t -p "${command_param}" + } + + function check_username(){ + echo "${command_param}" | grep -q -E '^[ 0-9a-zA-Z./:]*$' + result=$? + if [ "$result" -ne 0 ]; then + echo "Parameter:${command_param} is invalied!" + exit 1 + fi + if ! id -u "${command_param}" >/dev/null 2>&1 ; then + echo "User:${command_param} does not exist" + exit 1 + fi + } + + function get_cmd(){ + params=$(cat /proc/sys/kernel/pid_max) + if [[ ! "$params" =~ ${reg_int} ]]; then + echo "Get max_pid error" + exit 1 + fi + digits=1 + while ((${params}>10)); do + let "digits++" + ((params /= 10)) + done + compile='[1-9]' + arr[0]='[0-9]' + for((i=1;i /etc/sudoers.d/"${command_param}"_profiling + result=$? + if [ "$result" -ne 0 ]; then + echo "Set cmd to /etc/sudoers.d/${command_param}_profiling failed!" + exit 1 + else + echo "The user permission have been configured successfully. You can find the configuration file /etc/sudoers.d/${command_param}_profiling" + exit + fi + fi + has_add=$(cat /etc/sudoers|grep "${script_name}"|grep "${command_param}") + if [ "${has_add}" ]; then + echo "The configure already exist, please confirm its content is correct" + exit + fi + chmod u+w /etc/sudoers + result=$? + if [ "$result" -ne 0 ]; then + echo "Permission configure failed" + exit 1 + fi + echo "${cmd}" >> /etc/sudoers + chmod u-w /etc/sudoers + echo "The user permission have been configured successfully. You can find the configuration file in the /etc/sudoers." + } + + function handle_sudoers(){ + check_username + get_cmd + set_sudoers + } + + function main(){ + if [ $# -ne 2 ]; then + echo "The number of parameters is incorrect, please enter two parameters" + exit 1 + fi + if [ "${command_type}" = "set-sudoers" ]; then + echo "Run set-sudoers cmd" + handle_sudoers + elif [ "${command_type}" = "get-version" ]; then + #echo "Run get-version cmd" + get_version + elif [ "${command_type}" = "kill" ]; then + #echo "kill cmd" + kill_prof_cmd + elif [ "${command_type}" = "perf" ]; then + #echo "run perf trace cmd" + run_prof_trace_cmd + elif [ "${command_type}" = "ltrace" ] ; then + #echo "run ltrace cmd" + run_ltrace_cmd + elif [ "${command_type}" = "iotop" ]; then + #echo "run iotop cmd" + run_iotop_cmd + else + printf "The value of the first parameter is incorrect, please enter the correct parameter, " + printf "such as: set-sudoers, get-version, kill, perf, ltrace, iotop\n" + exit 1 + fi + } + + main "$@" + ``` \ No newline at end of file