diff --git a/source/tools/detect/appscan/Makefile b/source/tools/detect/appscan/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4649976efcbd8b85ce3005c64e2a2fa8b5327159 --- /dev/null +++ b/source/tools/detect/appscan/Makefile @@ -0,0 +1,5 @@ +target := appscan +DEPEND := "prev{default modin};post{default modun}" + +include $(SRC)/mk/sh.mk + diff --git a/source/tools/detect/appscan/appscan.sh b/source/tools/detect/appscan/appscan.sh new file mode 100755 index 0000000000000000000000000000000000000000..d3e407dd3803d238854485952c057bd449066735 --- /dev/null +++ b/source/tools/detect/appscan/appscan.sh @@ -0,0 +1,816 @@ +#!/bin/sh + +pid=-1 + +CURDATE=$(date "+%Y-%m-%d-%H-%M-%S") +SLOGDIR="/var/log/sysak" +WORKDIR="$(hostname)_appscan" +LOGDIR="${SLOGDIR}/${WORKDIR}" +LOGFILE="" +CPU_RESFILE="scan_cpu_result" +MEM_RESFILE="scan_mem_result" +IO_RESFILE="scan_io_result" +PERFDIR="" +CURPATH="$SYSAK_WORK_PATH/tools" + +SCAN_CPU_PID=-1 +SCAN_MEM_PID=-1 +SCAN_IO_PID=-1 +SCAN_NET_PID=-1 +SCAN_TIME=-1 + +#############################CPU result############################# +SCAN_CPU_SUM=0 +THEAD_ID_ARR=() +CPU_LOW_CNT_ARR=() +CPU_SYS_HIGH_CNT_ARR=() +CSWCH_HIGH_ARR=() +NVCSWCH_HIGH_ARR=() +CSWCH_SUM_ARR=() +CPU_SCAN_SUM_ARR=() + +############################Memory result############################ +SCAN_MEM_SUM=0 +MEM_MINOR_PF_CNT=0 +MEM_MAJOR_PF_CNT=0 +MEM_LOW_WM_CNT=0 +MEM_HIGH_WM_CNT=0 +MEM_DIFF_NUMA_FLAG=0 +MEM_THREAD_RUN_DIFF_NUMA_FLAG=0 + +##############################IO result############################## +SCAN_IO_SUM=0 +IO_BUSY_CNT=0 + +##############################threshold############################## +CPU_RATIO_LOW_LVL=50 #20 +CPU_RATIO_LOW_SYS_LVL=$((CPU_RATIO_LOW_LVL/3)) +CPU_RATIO_HIGH_SYS_LVL=20 +PERF_LAST_TIME=3 + +THREAD_CSWCH_HIGH_LVL=100 +THREAD_NVCSWCH_HIGH_LVL=200 + +PROCESS_MINOR_PG_FAULT_LVL=50 +PROCESS_MAJOR_PG_FAULT_LVL=0 + +IO_QUEUE_SIZE_LVL=20.00 + +IO_AWAIT_LVL=3.00 +IO_SVCTM_LVL=1.00 +IO_AWAIT_SVC_MUL_LVL=5 +##############################threshold############################## + +function usage() { + echo "sysak appscan: Scan process for performance bottlenecks" + echo "options: -h, help information" + echo " -p pid, process id to be monitored" + echo " -t seconds, scan duration(unit is second).If not specified, stop when a bottleneck is detected" +} + +function add_cpu_sched_arr_cnt() { + local arr_num=${#THEAD_ID_ARR[@]} + local i=0 + local flag=0 + + while [ $i -lt $arr_num ]; do + if [ ${THEAD_ID_ARR[$i]} -eq $1 ]; then + if [ $2 -eq 1 ]; then + let CPU_LOW_CNT_ARR[$i]=CPU_LOW_CNT_ARR[$i]+1 + elif [ $2 -eq 2 ]; then + let CPU_SYS_HIGH_CNT_ARR[$i]=CPU_SYS_HIGH_CNT_ARR[$i]+1 + elif [ $2 -eq 3 ]; then + let CSWCH_HIGH_ARR[$i]=CSWCH_HIGH_ARR[$i]+1 + elif [ $2 -eq 4 ]; then + let NVCSWCH_HIGH_ARR[$i]=NVCSWCH_HIGH_ARR[$i]+1 + elif [ $2 -eq 5 ]; then + let CSWCH_SUM_ARR[$i]=CSWCH_SUM_ARR[$i]+1 + elif [ $2 -eq 6 ]; then + let CPU_SCAN_SUM_ARR[$i]=CPU_SCAN_SUM_ARR[$i]+1 + else + echo "function parameter error">>${LOGFILE} + fi + + flag=1 + break + fi + + let i=i+1 + done + + if [ $flag -eq 0 ]; then + THEAD_ID_ARR[arr_num]=$1 + + if [ $2 -eq 1 ]; then + ###cpu low### + CPU_LOW_CNT_ARR[arr_num]=1 + CPU_SYS_HIGH_CNT_ARR[arr_num]=0 + CSWCH_HIGH_ARR[arr_num]=0 + NVCSWCH_HIGH_ARR[arr_num]=0 + CSWCH_SUM_ARR[arr_num]=0 + CPU_SCAN_SUM_ARR[arr_num]=0 + elif [ $2 -eq 2 ]; then + ###cpu sys high### + CPU_LOW_CNT_ARR[arr_num]=0 + CPU_SYS_HIGH_CNT_ARR[arr_num]=1 + CSWCH_HIGH_ARR[arr_num]=0 + NVCSWCH_HIGH_ARR[arr_num]=0 + CSWCH_SUM_ARR[arr_num]=0 + CPU_SCAN_SUM_ARR[arr_num]=0 + elif [ $2 -eq 3 ]; then + ###voluntary context switch### + CPU_LOW_CNT_ARR[arr_num]=0 + CPU_SYS_HIGH_CNT_ARR[arr_num]=0 + CSWCH_HIGH_ARR[arr_num]=1 + NVCSWCH_HIGH_ARR[arr_num]=0 + CSWCH_SUM_ARR[arr_num]=0 + CPU_SCAN_SUM_ARR[arr_num]=0 + elif [ $2 -eq 4 ]; then + ###involuntary context switch### + CPU_LOW_CNT_ARR[arr_num]=0 + CPU_SYS_HIGH_CNT_ARR[arr_num]=0 + CSWCH_HIGH_ARR[arr_num]=0 + NVCSWCH_HIGH_ARR[arr_num]=1 + CSWCH_SUM_ARR[arr_num]=0 + CPU_SCAN_SUM_ARR[arr_num]=0 + elif [ $2 -eq 5 ]; then + ###Number of scan thread context switches### + CPU_LOW_CNT_ARR[arr_num]=0 + CPU_SYS_HIGH_CNT_ARR[arr_num]=0 + CSWCH_HIGH_ARR[arr_num]=0 + NVCSWCH_HIGH_ARR[arr_num]=0 + CSWCH_SUM_ARR[arr_num]=1 + CPU_SCAN_SUM_ARR[arr_num]=0 + elif [ $2 -eq 6 ]; then + ###Number of scan cpu### + CPU_LOW_CNT_ARR[arr_num]=0 + CPU_SYS_HIGH_CNT_ARR[arr_num]=0 + CSWCH_HIGH_ARR[arr_num]=0 + NVCSWCH_HIGH_ARR[arr_num]=0 + CSWCH_SUM_ARR[arr_num]=0 + CPU_SCAN_SUM_ARR[arr_num]=1 + else + echo "function parameter error">>${LOGFILE} + fi + fi + +:</dev/null 2>&1` +} + +function scan_cpu() { + trap 'on_cpu_abrt' SIGABRT + trap "" SIGINT + + while : ;do + local cmd_res=`pidstat -t -u -w -p $pid 1 1 2>/dev/null | grep -E "Average|平均时间" | grep -v "UID"` + + local row_num=`echo "$cmd_res" | wc -l` + row_num=$((row_num/2)) + local cpu_ratio=(`echo "$cmd_res" | head -n $row_num | awk '{print $4,$5,$6,$8}' | sed "s/\.[0-9]*//g"`) + + local ctx_swch=(`echo "$cmd_res" | tail -n $row_num | awk '{print $4,$5,$6}' | sed "s/\.[0-9]*//g"`) + local cpu_ratio_num=${#cpu_ratio[@]} + local bg_pid=-1 + + local i=0 + local j=0 + + while [ $i -lt $cpu_ratio_num ]; do + tid=${cpu_ratio[$i]} + usr=${cpu_ratio[$i+1]} + sys=${cpu_ratio[$i+2]} + sum=${cpu_ratio[$i+3]} + + if [ $i -eq 0 ]; then + let i=i+4 + let j=j+3 + continue + else + perf_cmd="perf record -g --tid=$tid" + task_name="thread $tid" + fi + + add_cpu_scan_sum_cnt $tid + + if [ $sum -lt $CPU_RATIO_LOW_LVL ]; then + #CPU usage too low + add_cpu_low_cnt $tid + + echo "$task_name cpu usage $sum is lower than $CPU_RATIO_LOW_LVL" >>${LOGFILE} + + perf_cmd="${perf_cmd} -o ${PERFDIR}/perf.tid_${tid}-cpu_usage_low_$sum.`date "+%Y-%m-%d-%H-%M-%S"`.data sleep ${PERF_LAST_TIME}" + + if [ $sys -gt $CPU_RATIO_LOW_SYS_LVL ]; then + ############sys too high,perf the thread########################### + echo "$task_name cpu sys usage $sys is higher than $CPU_RATIO_LOW_SYS_LVL" >>${LOGFILE} + else + ############usr triggers schedule,like mutex and so on.############ + echo "Maybe $task_name triggers schedule too frequently, like Mutex granularity is too large and so on." >>${LOGFILE} + fi + + echo "$perf_cmd" >>${LOGFILE} + #`$perf_cmd >/dev/null 2>&1` & + perf_fun "$perf_cmd" & + bg_pid=$! + else + #CPU usage high + + if [ $sys -gt $CPU_RATIO_HIGH_SYS_LVL ]; then + ########################CPU sys usage high#################### + add_cpu_sys_high_cnt $tid + echo "$task_name cpu sys usage $sys is higher than $CPU_RATIO_HIGH_SYS_LVL" >>${LOGFILE} + + perf_cmd="${perf_cmd} -o ${PERFDIR}/perf.tid_${tid}-cpu_sys_high_$sys.`date "+%Y-%m-%d-%H-%M-%S"`.data sleep ${PERF_LAST_TIME}" + + echo "$perf_cmd" >>${LOGFILE} + perf_fun "$perf_cmd" & + bg_pid=$! + else + ########################CPU usr usage high################### + cswch=${ctx_swch[$j+1]} + nvcswch=${ctx_swch[$j+2]} + + ############################check context switch frequency################################## + add_cswch_sum_cnt $tid + + if [ $cswch -gt $THREAD_CSWCH_HIGH_LVL ]; then + add_cswch_high_cnt $tid + echo "thread $tid voluntary context switch frequency $cswch greater than $THREAD_CSWCH_HIGH_LVL/s" >>${LOGFILE} + fi + + if [ $nvcswch -gt $THREAD_NVCSWCH_HIGH_LVL ]; then + add_nvcswch_high_cnt $tid + echo "thread $tid involuntary context switch frequency $nvcswch greater than $THREAD_NVCSWCH_HIGH_LVL/s" >>${LOGFILE} + fi + + ############################check if bind cpu core or not################################### + cpu_on_line=`lscpu | grep On-line | awk '{print $4}'` + cpu_allowed=`cat /proc/$tid/status | grep "Cpus_allowed_list" | awk '{print $2}'` + + if [ "$cpu_allowed" != "$cpu_on_line" ]; then + echo "thread $tid bind cpu $cpu_allowed" >>${LOGFILE} + fi + fi + fi + + let i=i+4 + let j=j+3 + done + + if [ $bg_pid -gt 0 ]; then + wait $bg_pid + fi + + done +} + +function scan_mem() { + local low_wmark_hit=-1 + local high_wmark_hit=-1 + + trap 'on_mem_abrt' SIGABRT + trap "" SIGINT + + while : ;do + ##########################check process page fault############################ + local pg_fault=(`pidstat -r -p $pid 2 1 2>/dev/null | tail -n 1 | awk '{print $4,$5}' | sed "s/\.[0-9]*//g"`) + + local minor_fault=${pg_fault[0]} + local major_fault=${pg_fault[1]} + + let SCAN_MEM_SUM=SCAN_MEM_SUM+1 + + if [ $minor_fault -gt $PROCESS_MINOR_PG_FAULT_LVL ]; then + echo "process $pid minor page fault $minor_fault is greater than $PROCESS_MINOR_PG_FAULT_LVL" >>${LOGFILE} + let MEM_MINOR_PF_CNT=MEM_MINOR_PF_CNT+1 + fi + + if [ $major_fault -gt $PROCESS_MAJOR_PG_FAULT_LVL ]; then + echo "process $pid major page fault $major_fault is greater than $PROCESS_MAJOR_PG_FAULT_LVL" >>${LOGFILE} + let MEM_MAJOR_PF_CNT=MEM_MAJOR_PF_CNT+1 + fi + + #############################check watermark################################### + local node_info=(`cat /proc/zoneinfo | grep -A 3 "pages free" | tr -cd "[0-9]\n"`) + local node_name=`cat /proc/zoneinfo | grep Node` + + local i=0 + local j=1 + local node_info_num=${#node_info[@]} + + while [ $i -lt $node_info_num ]; do + local free=${node_info[$i]} + local min=${node_info[$i+1]} + local low=${node_info[$i+2]} + local high=${node_info[$i+3]} + + if [ $free -lt $high ]; then + echo "`echo "$node_name" | sed -n ${j}'p'` free memory $free is lower than watermark $high pages" >>${LOGFILE} + fi + + let i=i+4 + let j=j+1 + done + + ############################check watermark hit################################ + local wmark_hit=(`cat /proc/vmstat | grep "kswapd_.*_wmark_hit_quickly" | awk '{print $2}'`) + local tmp_low_wmark_hit=${wmark_hit[0]} + local tmp_high_wmark_hit=${wmark_hit[1]} + + if [ $low_wmark_hit -ne -1 ]; then + let low_diff=tmp_low_wmark_hit-low_wmark_hit + let high_diff=tmp_high_wmark_hit-high_wmark_hit + + if [ $low_diff -gt 0 ]; then + echo "The number of free pages drops below the low watermark $low_diff timers in a second." >>${LOGFILE} + let MEM_LOW_WM_CNT=MEM_LOW_WM_CNT+1 + fi + + if [ $high_diff -gt 0 ]; then + echo "The number of free pages drops below the high watermark $high_diff times in a second." >>${LOGFILE} + let MEM_HIGH_WM_CNT=MEM_HIGH_WM_CNT+1 + fi + fi + + low_wmark_hit=$tmp_low_wmark_hit + high_wmark_hit=$tmp_high_wmark_hit + + ###########################check NUMA######################################### + local numa_info=(`numastat -p $pid 2>/dev/null | tail -n 1 | awk '{for (i=2;i>${LOGFILE} + MEM_DIFF_NUMA_FLAG=1 + else + #Memory locate in same NUMA node.Check threads if running on the NUMA node which memory locates or not. + local node_cpus=(`numactl --hardware 2>/dev/null | grep "node $node_id cpus" | awk -F":" '{print $2}'`) + + local running_threads=(`pidstat -t -p $pid | grep -v "CPU" | awk '{print $5,$10}'`) + local running_threads_num=${#running_threads[@]} + + i=2 + while [ $i -lt $running_threads_num ]; do + local thread_id=${running_threads[$i]} + local thread_cpu=${running_threads[$i+1]} + local flag=0 + + for j in ${node_cpus[@]}; do + if [ $thread_cpu -eq $j ]; then + flag=1 + break + fi + done + + if [ $flag -eq 0 ]; then + echo "Thread $thread_id running not in NUMA node $node_id" >>${LOGFILE} + MEM_THREAD_RUN_DIFF_NUMA_FLAG=1 + fi + + let i=i+2 + done + fi + done +} + +function scan_io() { + trap 'on_io_abrt' SIGABRT + trap "" SIGINT + + while :; do + if [ ! -d "/proc/sysak/appscan" ];then + echo "directory /proc/sysak/appscan not exists" >> ${LOGFILE} + sleep 5 + else + break + fi + done + + echo 1 > /proc/sysak/appscan/enable + + while :; do + #local io_info=(`iostat -y -x 1 1 | grep -A 1000 "Device" | grep -v "Device" | awk '{print $1,$9,$10,$11,$12,$13,$14}' | sed "s/\.[0-9]*//g"`) + echo $pid > /proc/sysak/appscan/pid + local io_info=(`iostat -y -x 1 1 | grep -A 1000 "Device" | grep -v "Device" | awk '{print $1,$6,$7,$8,$9,$10,$11,$12,$13,$14}'`) + + local pid_devs=(`cat /proc/sysak/appscan/dev`) + + echo -1 > /proc/sysak/appscan/pid + + let SCAN_IO_SUM=SCAN_IO_SUM+1 + +:< $IO_AWAIT_LVL"|bc` -eq 1 ]; then + echo -e "${alert_info}await is greater than ${IO_AWAIT_LVL}ms" >>${LOGFILE} + alert_info="" + flag=1 + elif [ `echo "$rawait > $IO_AWAIT_LVL"|bc` -eq 1 ]; then + echo -e "${alert_info}rawait is greater than ${IO_AWAIT_LVL}ms" >>${LOGFILE} + alert_info="" + flag=1 + elif [ `echo "$wawait > $IO_AWAIT_LVL"|bc` -eq 1 ]; then + echo -e "${alert_info}wawait is greater than ${IO_AWAIT_LVL}ms" >>${LOGFILE} + alert_info="" + flag=1 + fi + + if [ `echo "$svctm > $IO_SVCTM_LVL"|bc` -eq 1 ]; then + echo -e "${alert_info}svctm is greater than ${IO_SVCTM_LVL}ms" >>${LOGFILE} + flag=1 + fi + + if [ $flag -eq 0 ] && [ `echo "$svctm > 0"|bc` -eq 1 ]; then + if [ `echo "${await}/${svctm}"|bc` -gt $IO_AWAIT_SVC_MUL_LVL ]; then + echo -e "${alert_info}await/svctm is greater than $IO_AWAIT_SVC_MUL_LVL" >>${LOGFILE} + alert_info="" + flag=1 + elif [ `echo "${rawait}/${svctm}"|bc` -gt $IO_AWAIT_SVC_MUL_LVL ]; then + echo -e "${alert_info}r_await/svctm is greater than $IO_AWAIT_SVC_MUL_LVL" >>${LOGFILE} + alert_info="" + flag=1 + elif [ `echo "${wawait}/${svctm}"|bc` -gt $IO_AWAIT_SVC_MUL_LVL ]; then + echo -e "${alert_info}w_await/svctm is greater than $IO_AWAIT_SVC_MUL_LVL" >>${LOGFILE} + alert_info="" + flag=1 + fi + fi + + if [ $flag -eq 1 ]; then + io_devs+=($dev) + fi + + let i=i+10 + done + + flag=0 + for i in ${io_devs[@]}; do + for j in ${pid_devs[@]}; do + if [ "$i" = "$j" ]; then + flag=1 + echo "Process $pid has IO from/to device ${i} which is very busy!" >>${LOGFILE} + fi + done + done + + if [ $flag -eq 1 ]; then + let IO_BUSY_CNT=IO_BUSY_CNT+1 + fi + + done +} + +function scan_net() { + echo "scan net" +} + +function write_scan_cpu_res() { + local i=0 + local flag=0 + local arr_num=0 + + printf "#########################CPU & Schedule#########################\n" >> $CPU_RESFILE + + i=0 + arr_num=${#CPU_LOW_CNT_ARR[*]} + + while [ $i -lt $arr_num ]; do + if [ ${CPU_LOW_CNT_ARR[$i]} -gt 0 ]; then + if [ $flag -eq 0 ]; then + printf "CPU usage lower than ${CPU_RATIO_LOW_LVL}%% are threads:\n" >> $CPU_RESFILE + printf "%-15s%-25s%-35s\n" "Thread id" "Number of occurrences" "Total number of scanning" >> $CPU_RESFILE + fi + + flag=1 + printf "%-23d%-25d%-35d\n" ${THEAD_ID_ARR[$i]} ${CPU_LOW_CNT_ARR[$i]} ${CPU_SCAN_SUM_ARR[$i]} >> $CPU_RESFILE + fi + + let i=i+1 + done + + i=0 + flag=0 + arr_num=${#CPU_SYS_HIGH_CNT_ARR[*]} + + while [ $i -lt $arr_num ]; do + if [ ${CPU_SYS_HIGH_CNT_ARR[$i]} -gt 0 ]; then + if [ $flag -eq 0 ]; then + printf "\nCPU sys usage higher than ${CPU_RATIO_HIGH_SYS_LVL}%% are threads:\n" >> $CPU_RESFILE + printf "%-15s%-25s%-35s\n" "Thread id" "Number of occurrences" "Total number of scanning" >> $CPU_RESFILE + fi + + flag=1 + printf "%-23d%-25d%-35d\n" ${THEAD_ID_ARR[$i]} ${CPU_SYS_HIGH_CNT_ARR[$i]} ${CPU_SCAN_SUM_ARR[$i]} >> $CPU_RESFILE + fi + + let i=i+1 + done + + i=0 + flag=0 + arr_num=${#CSWCH_HIGH_ARR[*]} + + while [ $i -lt $arr_num ]; do + if [ ${CSWCH_HIGH_ARR[$i]} -gt 0 ]; then + if [ $flag -eq 0 ]; then + printf "\nWhen cpu usage is normal, but voluntary context switch frequency higher than ${THREAD_CSWCH_HIGH_LVL} counts per second are threads:\n" >> $CPU_RESFILE + printf "%-15s%-25s%-35s\n" "Thread id" "Number of occurrences" "Total number of scanning" >> $CPU_RESFILE + fi + + flag=1 + printf "%-23d%-25d%-35d\n" ${THEAD_ID_ARR[$i]} ${CSWCH_HIGH_ARR[$i]} ${CSWCH_SUM_ARR[$i]} >> $CPU_RESFILE + fi + + let i=i+1 + done + + i=0 + flag=0 + arr_num=${#NVCSWCH_HIGH_ARR[*]} + + while [ $i -lt $arr_num ]; do + if [ ${NVCSWCH_HIGH_ARR[$i]} -gt 0 ]; then + if [ $flag -eq 0 ]; then + printf "\nWhen cpu usage is normal, but involuntary context switch frequency higher than ${THREAD_NVCSWCH_HIGH_LVL} counts per second are threads:\n" >> $CPU_RESFILE + printf "%-15s%-25s%-35s\n" "Thread id" "Number of occurrences" "Total number of scanning" >> $CPU_RESFILE + fi + + flag=1 + printf "%-23d%-25d%-35d\n" ${THEAD_ID_ARR[$i]} ${NVCSWCH_HIGH_ARR[$i]} ${CSWCH_SUM_ARR[$i]} >> $CPU_RESFILE + fi + + let i=i+1 + done +} + +function write_scan_mem_res() { + printf "#############################Memory#############################\n" >> $MEM_RESFILE + + if [ $MEM_MINOR_PF_CNT -gt 0 ]; then + printf "Process minor page fault is greater than $PROCESS_MINOR_PG_FAULT_LVL per second:\n" >> $MEM_RESFILE + printf "%-25s%-35s\n" "Number of occurrences" "Total number of scanning" >> $MEM_RESFILE + printf "%-35d%-35d\n" ${MEM_MINOR_PF_CNT} ${SCAN_MEM_SUM} >> $MEM_RESFILE + fi + + if [ $MEM_MAJOR_PF_CNT -gt 0 ]; then + printf "Process major page fault is greater than $PROCESS_MAJOR_PG_FAULT_LVL per second:\n" >> $MEM_RESFILE + printf "%-25s%-35s\n" "Number of occurrences" "Total number of scanning" >> $MEM_RESFILE + printf "%-35d%-35d\n" ${MEM_MAJOR_PF_CNT} ${SCAN_MEM_SUM} >> $MEM_RESFILE + fi + + if [ $MEM_LOW_WM_CNT -gt 0 ]; then + printf "The number of free pages drops below the low watermark in a second:\n" >> $MEM_RESFILE + printf "%-25s%-35s\n" "Number of occurrences" "Total number of scanning" >> $MEM_RESFILE + printf "%-35d%-35d\n" ${MEM_LOW_WM_CNT} ${SCAN_MEM_SUM} >> $MEM_RESFILE + fi + + if [ $MEM_HIGH_WM_CNT -gt 0 ]; then + printf "The number of free pages drops below the high watermark in a second:\n" >> $MEM_RESFILE + printf "%-25s%-35s\n" "Number of occurrences" "Total number of scanning" >> $MEM_RESFILE + printf "%-35d%-35d\n" ${MEM_HIGH_WM_CNT} ${SCAN_MEM_SUM} >> $MEM_RESFILE + fi + + if [ $MEM_DIFF_NUMA_FLAG -eq 1 ]; then + printf "\nProcess memory locate different NUMA node.\n" >> $MEM_RESFILE + fi + + if [ $MEM_THREAD_RUN_DIFF_NUMA_FLAG -eq 1 ]; then + printf "\nThread of process run different NUMA node.\n" >> $MEM_RESFILE + fi +} + +function write_scan_io_res() { + printf "###############################IO###############################\n" >> $IO_RESFILE + + if [ $IO_BUSY_CNT -gt 0 ]; then + printf "Process has IO from/to device(s) which is(are) very busy:\n" >> $IO_RESFILE + printf "%-25s%-35s\n" "Number of occurrences" "Total number of scanning" >> $IO_RESFILE + printf "%-35d%-35d\n" ${IO_BUSY_CNT} ${SCAN_IO_SUM} >> $IO_RESFILE + fi +} + +function on_cpu_abrt() { + write_scan_cpu_res + exit +} + +function on_mem_abrt() { + write_scan_mem_res + exit +} + +function on_io_abrt() { + echo 0 > /proc/sysak/appscan/enable + write_scan_io_res + exit +} + +function on_ctrl_c() { + kill_scan_process + #wait + show_scan_res + exit +} + +function show_scan_res() { + cat ${CPU_RESFILE} + echo "" + cat ${MEM_RESFILE} + echo "" + cat ${IO_RESFILE} +} + +function check_opts() { + if [ $pid -eq -1 ]; then + usage + exit -1 + fi +} + +function mk_log_dir() { + if [ ! -d "$LOGDIR" ];then + mkdir -p $LOGDIR + fi + + LOGDIR="${LOGDIR}/pid_$pid.${CURDATE}" + PERFDIR="${LOGDIR}/perf" + LOGFILE="${LOGDIR}/log" + CPU_RESFILE="${LOGDIR}/${CPU_RESFILE}" + MEM_RESFILE="${LOGDIR}/${MEM_RESFILE}" + IO_RESFILE="${LOGDIR}/${IO_RESFILE}" + + if [ ! -d "$LOGDIR" ];then + echo "Create directory $LOGDIR for saving log and perf data." + mkdir $LOGDIR + mkdir $PERFDIR + else + echo "directory $LOGDIR already exists" + exit -1 + fi +} + +function kill_scan_process() { + kill -s 6 $SCAN_CPU_PID + kill -s 6 $SCAN_MEM_PID + kill -s 6 $SCAN_IO_PID +# kill $SCAN_NET_PID + wait +} + +while getopts 'p:t:lh' OPT; do + case $OPT in + "h") + usage + exit 0 + ;; + "p") + pid=$OPTARG + ;; + "t") + SCAN_TIME=$OPTARG + ;; + *) + usage + exit -1 + ;; + esac +done + +check_opts +mk_log_dir + +scan_cpu & +SCAN_CPU_PID=$! + +scan_mem & +SCAN_MEM_PID=$! + +scan_io & +SCAN_IO_PID=$! + +#scan_net & +#SCAN_NET_PID=$! + +#echo "cpu_pid:${SCAN_CPU_PID} mem_pid:${SCAN_MEM_PID} io_pid:${SCAN_IO_PID}" + +trap 'on_ctrl_c' INT + +if [ $SCAN_TIME -ne -1 ]; then + sleep $SCAN_TIME + kill_scan_process + show_scan_res +else + wait +fi + diff --git a/source/tools/detect/loadtask/loadtask.sh b/source/tools/detect/loadtask/loadtask.sh index 6c2e3189ec5bdb54b5402a6d8b11ef61b76da154..bd65e196ed9ed092eb07bcc5be6670abfa56abd1 100755 --- a/source/tools/detect/loadtask/loadtask.sh +++ b/source/tools/detect/loadtask/loadtask.sh @@ -7,78 +7,186 @@ # Modify Date: 2021-02-06 10:53 # Function: #***************************************************************# + usage() { echo "sysak loadtask: show all tasks of load contribution" - echo "options: -h, help information" - echo " -m maxload, only show tasks when load reach maxload " + echo "options: -h, help information" + echo " -m maxload, only show tasks when load reach maxload " echo " -f datafile, file for output" echo " -i interval, the interval checking the load" - echo " -d, run as deamon" - echo " -s, show summary result" + echo " -d, keep monitoring even if greater than maxload occurs.useful only if the -m option is set" + echo " -s, show summary result" + echo " -k, terminate running ${selftaskname} which started previously" + echo " -r datafile, read datafile created by '-f datafile' or by default(datafile directory /var/log/sysak/loadtask/) and show result" } uninterrupt_cnt=0 running_cnt=0 +container="" + +get_container() { + if [ -f $TOOLS_ROOT/tcontainer ]; then + container="`$TOOLS_ROOT/tcontainer -p $1`" + else + container="" + fi +} uninterrupt_dump() { - echo "uninterrupt tasks:" >> $datafile - for pid in $(ls /proc/); - do - if [ "$pid" -gt 0 ] 2>/dev/null; then - for tid in $(ls /proc/$pid/task/ 2>/dev/null); do - if [ "$tid" -gt 0 ] 2>/dev/null; then - run=`cat /proc/$tid/status | grep "disk sleep" | wc -l` - if [ "$run" -gt 0 ] 2>/dev/null; then - echo $tid >> $datafile - cat /proc/$tid/status | grep Name >> $datafile - cat /proc/$tid/stack >> $datafile - uninterrupt_cnt=$(($uninterrupt_cnt+1)) + local flag=0 + echo "uninterrupt tasks:" >> $tmpfile + + if [ -f $TOOLS_ROOT/taskstate ]; then + cat $dtaskfile >> $tmpfile + else + for pid in $(ls /proc/); + do + flag=0 + if [ "$pid" -gt 0 ] 2>/dev/null; then + for tid in $(ls /proc/$pid/task/ 2>/dev/null); do + if [ "$tid" -gt 0 ] 2>/dev/null; then + run=`cat /proc/$tid/status | grep "disk sleep" | wc -l` + if [ "$run" -gt 0 ] 2>/dev/null; then + echo $tid >> $tmpfile + if [ $flag -eq 0 ]; then + get_container $pid + flag=1 + fi + + echo "`cat /proc/$tid/status | grep Name` $container" >> $tmpfile + cat /proc/$tid/stack >> $tmpfile + uninterrupt_cnt=$(($uninterrupt_cnt+1)) + fi; fi; - fi; - done; - fi; - done + done; + fi; + done + fi } running_dump() { - echo "running tasks:" >> $datafile - for pid in $(ls /proc/); - do - if [ "$pid" -gt 0 ] 2>/dev/null; then - for tid in $(ls /proc/$pid/task/ 2>/dev/null); do - if [ "$tid" -gt 0 ] 2>/dev/null; then - run=`cat /proc/$tid/status | grep "running" | wc -l` - if [ "$run" -gt 0 ] 2>/dev/null; then - echo $tid >> $datafile - cat /proc/$tid/status | grep Name >> $datafile - running_cnt=$(($running_cnt+1)) + local flag=0 + echo "running tasks:" >> $tmpfile + + if [ -f $TOOLS_ROOT/taskstate ]; then + cat $rtaskfile >> $tmpfile + else + for pid in $(ls /proc/); + do + flag=0 + if [ "$pid" -gt 0 ] 2>/dev/null; then + for tid in $(ls /proc/$pid/task/ 2>/dev/null); do + if [ "$tid" -gt 0 ] 2>/dev/null; then + run=`cat /proc/$tid/status | grep "running" | wc -l` + if [ "$run" -gt 0 ] 2>/dev/null; then + echo $tid >> $tmpfile + if [ $flag -eq 0 ]; then + get_container $pid + flag=1 + fi + + echo "`cat /proc/$tid/status | grep Name` $container" >> $tmpfile + running_cnt=$(($running_cnt+1)) + fi; fi; - fi; - done; - fi; - done + done; + fi; + done + fi +} + +cal_sirq() { + if [ $1 -eq 1 ]; then + #exist softirq tool + if [ $2 -eq 0 ]; then + $TOOLS_ROOT/softirq -s $tmpsirqfile + else + $TOOLS_ROOT/softirq -s $tmpsirqfile -r $sirqspeedfile + fi + else + local sirq=`cat /proc/softirqs` + local cpu_num=`echo "$sirq" | head -n 1 | awk '{print NF}'` + local i=1 + local arr_num=0 + + while [ $i -le $sirq_num ]; do + local j=0 + local tmp=$((i+1)) + local sirq_row_data=`echo "$sirq" | head -n $tmp | tail -n 1` + local sum=`echo $sirq_row_data | awk 'BEGIN{sum=0}{for(i=2; i<=NF; i++) sum+=$i} END{print sum}'` + + if [ $2 -eq 0 ]; then + sirq_before[arr_num]=`echo $sirq_row_data | awk '{print $1}'` + arr_num=$((arr_num+1)) + sirq_before[arr_num]=$sum + else + sirq_after[arr_num]=`echo $sirq_row_data | awk '{print $1}'` + arr_num=$((arr_num+1)) + sirq_after[arr_num]=$sum + fi + arr_num=$((arr_num+1)) + i=$((i+1)) + done + fi +} + +cal_sirq_speed() { + echo "softirq speed:" >> $tmpfile + + if [ $1 -eq 1 ]; then + #exist softirq tool + cat $sirqspeedfile >> $tmpfile + else + local arr_num=${#sirq_before[@]} + local i=0 + + while [ $i -lt $arr_num ]; do + local diff=$((${sirq_after[$i+1]}-${sirq_before[$i+1]})) + echo " ${sirq_after[$i]} ${diff} count/s" >> $tmpfile + i=$((i+2)) + done + fi } #sort by name show_result() { - cat $datafile | grep "load reson" - cat $datafile | grep "caused by" + cat $1 | grep "load reason" + cat $1 | grep "caused by" echo "top load tasks:" - cat $datafile | grep Name | awk '{print $2}' | uniq -c | sort -nr + cat $1 | grep Name | awk '{print $2 " " $3}' | uniq -c | sort -nr + cat $1 | grep -A $sirq_num "softirq speed:" + echo "" } current_analyse() { - date > $datafile - cat /proc/loadavg >> $datafile + local high_sirq=0 + local exist_sirq_tool=0 + + if [ -f $TOOLS_ROOT/softirq ]; then + exist_sirq_tool=1 + fi + + echo "####################################################################################" > $tmpfile + date >> $tmpfile + cat /proc/loadavg >> $tmpfile load=`cat /proc/loadavg | awk '{print $1}'` - cpu_util=(`mpstat $cpuarg 1 1 | grep Average | awk '{print $3" "$5" "$6" "$7" "$8" "$12}'`) - echo "cpu: $cpu_util" >> $datafile - if [ $(echo "$load < 5" | bc) -eq 1 ] ;then - echo "load reson: not high" >> $datafile + + cal_sirq $exist_sirq_tool 0 + cpu_util=(`mpstat $cpuarg 1 1 | awk 'END {print $3" "$5" "$6" "$7" "$8" "$12}'`) + + cal_sirq $exist_sirq_tool 1 + + echo "cpu: ${cpu_util[@]}" >> $tmpfile + if [ $(echo "$load < 1" | bc) -eq 1 ] ;then + echo "load reason: not high" >> $tmpfile return fi + if [ -f $TOOLS_ROOT/taskstate ]; then + $TOOLS_ROOT/taskstate -r $rtaskfile -d $dtaskfile + fi + running_dump uninterrupt_dump usr_util=${cpu_util[0]} @@ -93,39 +201,52 @@ current_analyse() { if [ $(echo "$sys_util > ((100-$cpu_idle)*0.2)" | bc) -eq 1 ]; then high_cost+="sys " extra_cmd="[sysmonitor]" - extra_info="high memory or kernel competition" + extra_info="high memory or kernel competition " + if [ -f $TOOLS_ROOT/cpu_flamegraph ]; then + $TOOLS_ROOT/cpu_flamegraph -d 5 + fi fi if [ $(echo "$irq_util > ((100-$cpu_idle)*0.05)" | bc) -eq 1 ]; then - high_cost="irq " + high_cost+="irq " extra_cmd+="[cpuirq]" + if [ -f $TOOLS_ROOT/cpuirq ]; then + $TOOLS_ROOT/cpuirq + fi fi if [ $(echo "$soft_util > ((100-$cpu_idle)*0.05)" | bc) -eq 1 ]; then - high_cost="softirq " - extra_info+="high network competition" + high_cost+="softirq " + extra_info+="high network competition " + high_sirq=1 fi if [ $(echo "$io_wait > ((100-$cpu_idle)*0.1)" | bc) -eq 1 ]; then high_wait="io " extra_cmd+="[iolantency]" fi - mutex_cnt=`cat $datafile | grep mutex | wc -l` + mutex_cnt=`cat $tmpfile | grep mutex | wc -l` if [ $mutex_cnt -gt 5 ]; then high_wait+="mutex " #extra_cmd+="[lockcheck]" fi if [ $(echo "$load*0.2 > $uninterrupt_cnt" | bc) -eq 1 ]; then - echo "load reason: high $high_cost cpu cost" >> $datafile + echo "load reason: high $high_cost cpu cost" >> $tmpfile else if [ $(echo "$load*0.6 < $uninterrupt_cnt" | bc) -eq 1 ]; then - echo "load reason: high $high_wait wait" >> $datafile + echo "load reason: high $high_wait wait" >> $tmpfile else - echo "load reason: mixed press by high $high_cost and $high_wait wait" >> $datafile + echo "load reason: mixed press by high $high_cost and $high_wait wait" >> $tmpfile fi fi + if [ $high_sirq -eq 1 ]; then + cal_sirq_speed $exist_sirq_tool + fi + if [ -n "$extra_info" ]; then - echo this may caused by $extra_info, you can contact kernel support of use more sysak$extra_cmd tools >> $datafile + echo this may caused by $extra_info, you can contact kernel support of use more sysak$extra_cmd tools >> $tmpfile fi + + echo >> $tmpfile } history_analyse() { @@ -140,8 +261,10 @@ load_analyse() { fi if [ "$summary" == "true" ];then - show_result + show_result $tmpfile fi + + cat ${tmpfile} >> ${datafile} } monitor() { @@ -150,6 +273,13 @@ monitor() { load=`cat /proc/loadavg | awk '{print $1}' | awk -F. '{print $1}'` if [ $load -gt $max_load ]; then current_analyse + + if [ "$summary" == "true" ];then + show_result $tmpfile + fi + + cat ${tmpfile} >> ${datafile} + if [ "$deamon" != "true" ];then exit fi @@ -159,10 +289,71 @@ monitor() { done } +mk_log_dir() { + if [ ! -d "$loadtask_dir" ];then + mkdir -p $loadtask_dir + fi +} + +kill_old_loadtask() { + if [ -f "$pidfile" ]; then + local oldpid=`cat $pidfile` + local oldtaskname="`cat /proc/$oldpid/status 2>/dev/null | grep -w "Name" | awk -F" " '{print $2}'`" + + if [ "$oldtaskname" == "$selftaskname" ]; then + kill -9 $oldpid + fi + + rm -f $pidfile + fi +} + +create_pidfile() { + echo $$ > $pidfile +} + +parse_datafile() { + if [ -f "$parsed_datafile" ]; then + local line_arr=(`grep -n "#####" ${parsed_datafile} | awk -F":" '{print $1}'`) + local cnt=${#line_arr[@]} + local i=0 + + while [ $i -lt $cnt ]; do + local start=${line_arr[$i]} + + if [ $i -eq $(($cnt-1)) ]; then + sed -n ${start}',$p' ${parsed_datafile} > $tmp_parsed_datafile + else + local end=$((${line_arr[$i+1]}-1)) + sed -n ${start}','${end}'p' ${parsed_datafile} > $tmp_parsed_datafile + fi + + head -n 2 $tmp_parsed_datafile + show_result $tmp_parsed_datafile + + let i=i+1 + done + fi +} + interval=5 -datafile=/var/log/sysak/loadtask.log +loadtask_dir=/var/log/sysak/loadtask/ +datafile=${loadtask_dir}loadtask-`date "+%Y-%m-%d-%H-%M-%S"`.log +tmpfile=${loadtask_dir}.tmplog +rtaskfile=${loadtask_dir}runtask +dtaskfile=${loadtask_dir}dtask +tmpsirqfile=${loadtask_dir}tmpsoftirq +sirqspeedfile=${loadtask_dir}softirqspeed +pidfile=${loadtask_dir}.pidfile max_load=0 -while getopts 'm:f:i:t:dsh' OPT; do +sirq_num=$((`cat /proc/softirqs | wc -l`-1)) +sirq_before=() +sirq_after=() +selftaskname="`cat /proc/$$/status | grep -w "Name" | awk -F" " '{print $2}'`" +parsed_datafile="" +tmp_parsed_datafile=${loadtask_dir}.parsedlog + +while getopts 'm:f:i:t:r:dskh' OPT; do case $OPT in "h") usage @@ -183,9 +374,19 @@ while getopts 'm:f:i:t:dsh' OPT; do "d") deamon="true" ;; + "k") + kill_old_loadtask + exit 0 + ;; "s") summary="true" ;; + "r") + parsed_datafile=$OPTARG + mk_log_dir + parse_datafile + exit 0 + ;; *) usage exit -1 @@ -193,6 +394,10 @@ while getopts 'm:f:i:t:dsh' OPT; do esac done +mk_log_dir +kill_old_loadtask +create_pidfile + if [ $max_load -gt 0 ];then monitor else diff --git a/source/tools/detect/softirq/Makefile b/source/tools/detect/softirq/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..0dac025710a4fab25f8dcf2c8d1c164eb52a76e3 --- /dev/null +++ b/source/tools/detect/softirq/Makefile @@ -0,0 +1,5 @@ +target := softirq + +mods := main.o + +include $(SRC)/mk/csrc.mk diff --git a/source/tools/detect/softirq/main.c b/source/tools/detect/softirq/main.c new file mode 100644 index 0000000000000000000000000000000000000000..445570d6f8554d231582d7bcc8a0086cfae9b90a --- /dev/null +++ b/source/tools/detect/softirq/main.c @@ -0,0 +1,213 @@ +#include +#include +#include +#include +#include + +#define MAX_PATH_LEN 200 +#define PROC_SOFTIRQ "/proc/softirqs" +#define MAX_LINE_LEN 4096 + +static char g_softirq_file[MAX_PATH_LEN] = {0}; +static char g_res_file[MAX_PATH_LEN] = {0}; +static FILE *g_softirq_fp; +static FILE *g_res_fp; +static bool g_calc_res = false; + +static void usage(void) +{ + fprintf(stdout, + "sysak softirq: calculate softirq speed.It is used by tool loadtask.If you want use,please follow the steps below:\n" + " 1.run command 'softirq -s sourcefile' to output initial value to sourcefile\n" + " 2.sleep one second\n" + " 3.run command 'softirq -s sourcefile -r resultfile' to output result to resultfile\n" + "options: -h, help information\n" + " -s sourcefile, output initial value to sourcefile or get intial value from sourcefile\n" + " -r resultfile, output result to resultfile\n"); + exit(-1); +} + +static void parse_arg(int argc, char *argv[]) +{ + int ch; + + if (argc < 2) + usage(); + + while ((ch = getopt(argc, argv, "s:r:h")) != -1) { + switch (ch) { + case 's': + if (optarg && (strlen(optarg) < MAX_PATH_LEN)) + strncpy(g_softirq_file, optarg, strlen(optarg)); + else + exit(-1); + break; + case 'r': + g_calc_res = true; + if (optarg && (strlen(optarg) < MAX_PATH_LEN)) + strncpy(g_res_file, optarg, strlen(optarg)); + else + exit(-1); + break; + case 'h': + default: + usage(); + break; + } + } + + if (!g_softirq_file[0]) + exit(-1); +} + +int calc_softirq(void) +{ + char line[MAX_LINE_LEN] = {0}; + char *str; + char sum_str[30] = {0}; + bool flag = false; + int ret = 0; + long long sum = 0; + FILE *fp = fopen(PROC_SOFTIRQ, "r"); + + if (!fp) + return -1; + + while (fgets(line, sizeof(line), fp)) { + if (!flag) { + flag = true; + continue; + } + + str = strtok(line, " "); + if (!str) { + ret = -1; + break; + } + + if (fwrite(str, strlen(str), 1, g_softirq_fp) != 1) { + ret = -1; + break; + } + + sum = 0; + while (str = strtok(NULL, " \n")) { + sum += atoll(str); + } + + snprintf(sum_str, sizeof(sum_str), "%lld\n", sum); + if (fwrite(sum_str, strlen(sum_str), 1, g_softirq_fp) != 1) { + ret = -1; + break; + } + } + + fclose(fp); + return ret; +} + +int calc_softirq_speed(void) +{ + char line[MAX_LINE_LEN] = {0}; + char res_line[50] = {0}; + bool flag = false; + int ret = 0; + long long end_sum = 0; + long long start_sum = 0; + long long diff = 0; + char *str; + char *space_str = " "; + FILE *fp = fopen(PROC_SOFTIRQ, "r"); + + if (!fp) + return -1; + + while (fgets(line, sizeof(line), fp)) { + if (!flag) { + flag = true; + continue; + } + + str = strtok(line, " "); + if (!str) { + ret = -1; + break; + } + + if (fwrite(space_str, strlen(space_str), 1, g_res_fp) != 1) { + ret = -1; + break; + } + + if (fwrite(str, strlen(str), 1, g_res_fp) != 1) { + ret = -1; + break; + } + + end_sum = 0; + while (str = strtok(NULL, " \n")) { + end_sum += atoll(str); + } + + if (!fgets(res_line, sizeof(res_line), g_softirq_fp)) { + ret = -1; + break; + } + + str = strtok(res_line, ":"); + if (!str) { + ret = -1; + break; + } + + str = strtok(NULL, "\n"); + if (!str) { + ret = -1; + break; + } + + start_sum = atoll(str); + diff = end_sum - start_sum; + + snprintf(res_line, sizeof(res_line), "%lld counts/s\n", diff); + if (fwrite(res_line, strlen(res_line), 1, g_res_fp) != 1) { + ret = -1; + break; + } + } + + fclose(fp); + return ret; +} + +int main(int argc, char *argv[]) +{ + parse_arg(argc, argv); + + + if (!g_calc_res) { + g_softirq_fp = fopen(g_softirq_file, "w+"); + if (!g_softirq_fp) + exit(-1); + + calc_softirq(); + } + else { + g_softirq_fp = fopen(g_softirq_file, "r"); + if (!g_softirq_fp) + exit(-1); + + g_res_fp = fopen(g_res_file, "w+"); + if (!g_res_fp) { + fclose(g_softirq_fp); + exit(-1); + } + + calc_softirq_speed(); + + fclose(g_res_fp); + } + + fclose(g_softirq_fp); + return 0; +} diff --git a/source/tools/detect/taskstate/Makefile b/source/tools/detect/taskstate/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ea51b59931a842e4a34e309943b96499a6386bbc --- /dev/null +++ b/source/tools/detect/taskstate/Makefile @@ -0,0 +1,5 @@ +target := taskstate + +mods := main.o + +include $(SRC)/mk/csrc.mk diff --git a/source/tools/detect/taskstate/main.c b/source/tools/detect/taskstate/main.c new file mode 100644 index 0000000000000000000000000000000000000000..8003fa8bdbc61dbf108938b5ea1a6e4881b886df --- /dev/null +++ b/source/tools/detect/taskstate/main.c @@ -0,0 +1,205 @@ +#include +#include +#include +#include +#include +#include +#include + +#define MAX_PATH_LEN 200 +#define MAX_LINE_LEN 300 +#define TASK_NAME_KEY "Name" +#define TASK_STATE_KEY "State" +#define TASK_RUNNING "running" +#define TASK_UNINTERRUPT "disk sleep" + +static char g_rtask_file[MAX_PATH_LEN] = {0}; +static char g_dtask_file[MAX_PATH_LEN] = {0}; +static FILE *g_rtask_fp; +static FILE *g_dtask_fp; + +static void usage(void) +{ + fprintf(stdout, + "sysak taskstate: get tasks whose states are running or uninterruptible\n" + "options: -h, help information\n" + " -r file, get tasks whose states are running and output result to file\n" + " -d file, get tasks whose states are uninterruptible and output result to file\n"); + exit(-1); +} + +static void parse_arg(int argc, char *argv[]) +{ + int ch; + + //printf("argc:%d\n", argc); + + if (argc < 2) + usage(); + + while ((ch = getopt(argc, argv, "r:d:h")) != -1) { + switch (ch) { + case 'r': + if (optarg && (strlen(optarg) < MAX_PATH_LEN)) + strncpy(g_rtask_file, optarg, strlen(optarg)); + else + exit(-1); + break; + case 'd': + if (optarg && (strlen(optarg) < MAX_PATH_LEN)) + strncpy(g_dtask_file, optarg, strlen(optarg)); + else + exit(-1); + break; + case 'h': + default: + usage(); + break; + } + } + + if (!g_rtask_file[0] || !g_dtask_file[0]) + exit(-1); +} + +static void get_dtask_stack(long tid) +{ + char line[MAX_LINE_LEN] = {0}; + FILE *fp; + char path[MAX_PATH_LEN] = {0}; + + snprintf(path, sizeof(path), "/proc/%ld/stack", tid); + + fp = fopen(path, "r"); + if (!fp) + return; + + while (fgets(line, sizeof(line), fp)) { + + fwrite(line, strlen(line), 1, g_dtask_fp); + } + + fclose(fp); +} + +static int get_task_info(long pid) +{ + struct dirent *dirp; + DIR *dp; + char path[MAX_PATH_LEN] = {0}; + char tid_str[MAX_PATH_LEN] = {0}; + char line[MAX_LINE_LEN] = {0}; + char task_name[MAX_LINE_LEN] = {0}; + long tid; + FILE *fp; + char *start; + + snprintf(path, sizeof(path), "/proc/%ld/task", pid); + + if (!(dp = opendir(path))) { + return -1; + } + + while ((dirp = readdir(dp)) != NULL) { + if (dirp->d_type == DT_DIR) { + + tid = atol(dirp->d_name); + snprintf(path, sizeof(path), "/proc/%ld/status", tid); + + fp = fopen(path, "r"); + if (!fp) + continue; + + while (fgets(line, sizeof(line), fp)) { + + if ((start = strstr(line, TASK_NAME_KEY)) && (start == line)) { + strcpy(task_name, line); + continue; + } + + if ((start = strstr(line, TASK_STATE_KEY)) && (start == line)) { + + if (strstr(line, TASK_RUNNING)) { + snprintf(tid_str, sizeof(tid_str), "%ld\n", tid); + fwrite(tid_str, strlen(tid_str), 1, g_rtask_fp); + fwrite(task_name, strlen(task_name), 1, g_rtask_fp); + // fwrite(line, strlen(line), 1, g_rtask_fp); + } + else if (strstr(line, TASK_UNINTERRUPT)) { + snprintf(tid_str, sizeof(tid_str), "%ld\n", tid); + fwrite(tid_str, strlen(tid_str), 1, g_dtask_fp); + fwrite(task_name, strlen(task_name), 1, g_dtask_fp); + // fwrite(line, strlen(line), 1, g_dtask_fp); + get_dtask_stack(tid); + } + + break; + } + } + + fclose(fp); + } + } + + closedir(dp); + return 0; +} + +static int scan_task(void) +{ + struct dirent *dirp; + DIR *dp; + int i; + int len; + long pid; + + if (!(dp = opendir("/proc"))) { + return -1; + } + + while ((dirp = readdir(dp)) != NULL) { + if (dirp->d_type == DT_DIR) { + len = strlen(dirp->d_name); + + for (i = 0; dirp->d_name[i] != 0; ++i) { + if (!isdigit(dirp->d_name[i])) + break; + } + + if (len != i) + continue; + + pid = atol(dirp->d_name); + + get_task_info(pid); + + } + } + + closedir(dp); +// printf("\n"); + return 0; +} + +int main(int argc, char *argv[]) +{ + parse_arg(argc, argv); + + g_rtask_fp = fopen(g_rtask_file, "w+"); + if (!g_rtask_fp) { + exit(-1); + } + + g_dtask_fp = fopen(g_dtask_file, "w+"); + if (!g_dtask_fp) { + fclose(g_rtask_fp); + exit(-1); + } + + scan_task(); + +close_file: + fclose(g_dtask_fp); + fclose(g_rtask_fp); + return 0; +}