From 024f1755e994286c47e61ea3cdf41151594059f3 Mon Sep 17 00:00:00 2001 From: ZouTao Date: Thu, 25 May 2023 16:40:04 +0800 Subject: [PATCH 1/5] tasktop: support collect the stack of process with the status D --- source/tools/detect/sched/tasktop/tasktop.c | 195 ++++++++++++++------ 1 file changed, 138 insertions(+), 57 deletions(-) diff --git a/source/tools/detect/sched/tasktop/tasktop.c b/source/tools/detect/sched/tasktop/tasktop.c index 37a92cec..69e8b826 100644 --- a/source/tools/detect/sched/tasktop/tasktop.c +++ b/source/tools/detect/sched/tasktop/tasktop.c @@ -224,6 +224,62 @@ int swap(void* lhs, void* rhs, size_t sz) { return 0; } +static bool is_D(pid_t pid, pid_t tid) { + int res = false; + char path[FILE_PATH_LEN]; + struct proc_stat_t proc_info; + + snprintf(path, FILE_PATH_LEN, "/proc/%d/task/%d/stat", pid, tid); + FILE* fp = fopen(path, "r"); + if (!fp) { + return res; + } + + fscanf(fp, "%d %s %c", &proc_info.pid, proc_info.comm, &proc_info.state); + if (proc_info.state == 'D') res = true; + + fclose(fp); + return res; +} + +static int read_stack(pid_t pid, pid_t tid) { + int err = 0; + char stack_path[FILE_PATH_LEN]; + snprintf(stack_path, FILE_PATH_LEN, "/proc/%d/task/%d/stack", pid, tid); + FILE* fp = fopen(stack_path, "r"); + if (!fp) { + /* may be thread is exited */ + err = errno; + goto cleanup; + } + + char buf[1024]; + fprintf(stderr, "path=%s\n", stack_path); + while (fgets(buf, 1024, fp)) { + fprintf(stderr, "%s", buf); + } + fprintf(stderr, "\n"); + +cleanup: + if (fp) fclose(fp); + return err; +} + +static int read_d_task(struct id_pair_t* pids, int nr_thread) { + int i = 0; + int err = 0; + for (i = 0; i < nr_thread; i++) { + int pid = pids[i].pid; + int tid = pids[i].tid; + + if (is_D(pid, tid)) { + read_stack(pid, tid); + } + } + + return err; +} + static int read_sched_delay(struct sys_record_t* sys_rec) { FILE* fp = fopen(SCHEDSTAT_PATH, "r"); int err = 0; @@ -307,6 +363,7 @@ static int read_cgroup_throttle() { return err; } + static int read_stat(struct sys_cputime_t* prev_sys, struct sys_cputime_t* now_sys, struct sys_record_t* sys_rec) { @@ -377,7 +434,7 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { DIR* dir = NULL; DIR* task_dir = NULL; - u_int64_t proc_num = 0; + u_int64_t nr_thread = 0; struct dirent* proc_de = NULL; struct dirent* task_de = NULL; long val; @@ -397,45 +454,44 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { if (err) continue; pid = val; - if (!env.thread_mode) { - pids[proc_num].pid = pid; - pids[proc_num++].tid = -1; - } else { - char taskpath[FILE_PATH_LEN]; - snprintf(taskpath, FILE_PATH_LEN, "/proc/%d/task", pid); - task_dir = opendir(taskpath); - if (!task_dir) { - if (errno == ENOENT) { - continue; - } - err = errno; - goto cleanup; + // if (!env.thread_mode) { + // pids[proc_num].pid = pid; + // pids[proc_num++].tid = -1; + // } else { + char taskpath[FILE_PATH_LEN]; + snprintf(taskpath, FILE_PATH_LEN, "/proc/%d/task", pid); + task_dir = opendir(taskpath); + if (!task_dir) { + if (errno == ENOENT) { + continue; } + err = errno; + goto cleanup; + } - while ((task_de = readdir(task_dir)) != NULL) { - if (task_de->d_type != DT_DIR || - !strcmp(task_de->d_name, ".") || - !strcmp(task_de->d_name, "..")) - continue; - err = parse_long(task_de->d_name, &val); - - if (err) { - fprintf(stderr, "Failed parse tid\n"); - goto cleanup; - } - tid = val; + while ((task_de = readdir(task_dir)) != NULL) { + if (task_de->d_type != DT_DIR || !strcmp(task_de->d_name, ".") || + !strcmp(task_de->d_name, "..")) + continue; + err = parse_long(task_de->d_name, &val); - pids[proc_num].pid = pid; - pids[proc_num++].tid = tid; + if (err) { + fprintf(stderr, "Failed parse tid\n"); + goto cleanup; } + tid = val; - if (task_dir) { - closedir(task_dir); - task_dir = NULL; - } + pids[nr_thread].pid = pid; + pids[nr_thread++].tid = tid; + } + + if (task_dir) { + closedir(task_dir); + task_dir = NULL; } + // } } - *num = proc_num; + *num = nr_thread; cleanup: if (dir) closedir(dir); if (task_dir) closedir(task_dir); @@ -450,10 +506,13 @@ static int read_proc(pid_t pid, pid_t tid, struct task_cputime_t** prev, FILE* fp = 0; int err = 0; - if (tid != -1) { + /* tid > 0: tid valid */ + /* tid < 0 0: tid ignored */ + if (tid > 0) { snprintf(proc_path, FILE_PATH_LEN, "/proc/%d/task/%d/stat", pid, tid); pid = tid; } else { + /* tid < 0 means env is the process mode */ snprintf(proc_path, FILE_PATH_LEN, "/proc/%d/stat", pid); } @@ -528,12 +587,12 @@ cleanup: return err; } -static void sort_records(struct record_t* rec, int proc_num, +static void sort_records(struct record_t* rec, int rec_num, enum sort_type sort) { struct task_record_t** records = rec->tasks; int i, j; - for (i = 0; i < proc_num; i++) { - for (j = i + 1; j < proc_num; j++) { + for (i = 0; i < rec_num; i++) { + for (j = i + 1; j < rec_num; j++) { if (!records[j] && !records[i]) { continue; } else if (records[i] && !records[j]) { @@ -612,7 +671,7 @@ static char* second2str(time_t ts, char* buf, int size) { return buf; } -static void output(struct record_t* rec, int proc_num, FILE* dest) { +static void output(struct record_t* rec, int rec_num, FILE* dest) { struct task_record_t** records = rec->tasks; struct sys_record_t* sys = &rec->sys; struct proc_fork_info_t* info = &(sys->most_fork_info); @@ -642,7 +701,7 @@ static void output(struct record_t* rec, int proc_num, FILE* dest) { sys->percpu_sched_delay[i - 1]); } #endif - for (i = 0; i < proc_num; i++) { + for (i = 0; i < rec_num; i++) { if (!records[i]) break; if (env.human) { @@ -679,7 +738,7 @@ static void output(struct record_t* rec, int proc_num, FILE* dest) { fflush(dest); } -static void now_to_prev(struct id_pair_t* pids, int proc_num, int pidmax, +static void now_to_prev(struct id_pair_t* pids, int nr_thread, int pidmax, struct task_cputime_t** prev_task, struct task_cputime_t** now_task, struct sys_cputime_t* prev_sys, @@ -692,25 +751,31 @@ static void now_to_prev(struct id_pair_t* pids, int proc_num, int pidmax, } } - for (i = 0; i < proc_num; i++) { + for (i = 0; i < nr_thread; i++) { int pid; if (env.thread_mode) pid = pids[i].tid; - else + else { + /* only move once */ + if (pids[i].pid != pids[i].tid) continue; pid = pids[i].pid; + } + swap(&prev_task[pid], &now_task[pid], sizeof(struct task_cputime_t*)); } swap(prev_sys, now_sys, sizeof(struct sys_cputime_t) * (nr_cpu + 1)); } -static int make_records(struct id_pair_t* pids, int proc_num, +static int make_records(struct id_pair_t* pids, int nr_thread, struct record_t* rec, struct task_cputime_t** prev_task, - struct task_cputime_t** now_task) { + struct task_cputime_t** now_task, int* rec_num) { struct task_record_t** records = rec->tasks; int err = 0; u_int64_t i; - for (i = 0; i < proc_num; i++) { + int nr_rec = 0; + + for (i = 0; i < nr_thread; i++) { struct id_pair_t* id = &pids[i]; if (env.tid != -1) { @@ -721,20 +786,31 @@ static int make_records(struct id_pair_t* pids, int proc_num, } } - err = read_proc(id->pid, id->tid, prev_task, now_task, &records[i]); + /* many pair with the same pid, in process mode skip the trival read */ + if (!env.thread_mode && id->pid != id->tid) continue; + + if (env.thread_mode) { + err = read_proc(id->pid, id->tid, prev_task, now_task, + &records[nr_rec++]); + } else { + err = + read_proc(id->pid, -1, prev_task, now_task, &records[nr_rec++]); + } + if (err) { fprintf(stderr, "Failed read proc\n"); return err; } } + *rec_num = nr_rec; return err; } -static void free_records(struct record_t* rec, int proc_num) { +static void free_records(struct record_t* rec, int nr_thread) { struct task_record_t** records = rec->tasks; int i; - for (i = 0; i < proc_num; i++) { + for (i = 0; i < nr_thread; i++) { if (records[i]) free(records[i]); } free(records); @@ -900,35 +976,40 @@ int main(int argc, char** argv) { read_stat(prev_sys, now_sys, &rec->sys); /* get all process now */ - u_int64_t proc_num; - err = read_all_pids(pids, &proc_num); + u_int64_t nr_thread = 0; + int rec_num = 0; + + err = read_all_pids(pids, &nr_thread); if (err) { fprintf(stderr, "Failed read all pids.\n"); goto cleanup; } - printf("procnum=%lu\n", proc_num); - rec->tasks = calloc(proc_num, sizeof(struct task_record_t*)); + printf("nr_thread=%lu\n", nr_thread); + + read_d_task(pids, nr_thread); + + rec->tasks = calloc(nr_thread, sizeof(struct task_record_t*)); /* if prev process info exist produce record*/ - err = make_records(pids, proc_num, rec, prev_task, now_task); + err = make_records(pids, nr_thread, rec, prev_task, now_task, &rec_num); if (err) { fprintf(stderr, "Failed make records.\n"); goto cleanup; } /* sort record by sort type */ - sort_records(rec, proc_num, env.rec_sort); + sort_records(rec, rec_num, env.rec_sort); /* output record */ if (!first) - output(rec, proc_num, stat_log); + output(rec, rec_num, stat_log); else first = false; - free_records(rec, proc_num); + free_records(rec, nr_thread); /* update old info and free nonexist process info */ - now_to_prev(pids, proc_num, pidmax, prev_task, now_task, prev_sys, + now_to_prev(pids, nr_thread, pidmax, prev_task, now_task, prev_sys, now_sys); if (env.nr_iter) sleep(env.delay); -- Gitee From ad96eff18878f4b665abfbc84bcf073dabb25cb8 Mon Sep 17 00:00:00 2001 From: ZouTao Date: Mon, 29 May 2023 16:40:41 +0800 Subject: [PATCH 2/5] tasktop: fix fd resource leak error. --- source/tools/detect/sched/tasktop/procstate.c | 1 + source/tools/detect/sched/tasktop/tasktop.c | 410 ++++++++++++------ source/tools/detect/sched/tasktop/tasktop.h | 21 +- .../sched/tasktop/tasktopSelftest/test.c | 18 + 4 files changed, 318 insertions(+), 132 deletions(-) diff --git a/source/tools/detect/sched/tasktop/procstate.c b/source/tools/detect/sched/tasktop/procstate.c index a9603f06..4e9667ae 100644 --- a/source/tools/detect/sched/tasktop/procstate.c +++ b/source/tools/detect/sched/tasktop/procstate.c @@ -26,6 +26,7 @@ int runnable_proc(struct sys_record_t *sys) { sys->load1 = avg.load1; sys->nr_R = avg.nr_running; // printf("load1 = %.2f load5 = %.2f load15 = %.2f\n", avg.load1, avg.load5, avg.load15); + fclose(fp); return err; } diff --git a/source/tools/detect/sched/tasktop/tasktop.c b/source/tools/detect/sched/tasktop/tasktop.c index 69e8b826..e685782f 100644 --- a/source/tools/detect/sched/tasktop/tasktop.c +++ b/source/tools/detect/sched/tasktop/tasktop.c @@ -20,7 +20,8 @@ #include "tasktop.h" #include "common.h" -#define DEBUG +// #define DEBUG +// #define ONLY_THREAD char log_dir[FILE_PATH_LEN] = "/var/log/sysak/tasktop"; char default_log_path[FILE_PATH_LEN] = "/var/log/sysak/tasktop/tasktop.log"; @@ -39,13 +40,17 @@ struct env { enum sort_type rec_sort; int limit; bool human; + int stack_limit; + int cgroup_limit; } env = {.thread_mode = false, .delay = 3, .tid = -1, .human = false, .rec_sort = SORT_CPU, .nr_iter = LONG_MAX - 1, - .limit = INT_MAX}; + .limit = INT_MAX, + .stack_limit = 20, + .cgroup_limit = 20}; const char* argp_program_version = "tasktop 0.1"; const char argp_program_doc[] = @@ -64,8 +69,9 @@ const char argp_program_doc[] = " tasktop -d 5 # modify the sample interval.\n" " tasktop -i 3 # output 3 times then exit.\n" " tasktop -l 20 # limit the records number no more than 20.\n" - " tasktop -f a.log # log to a.log (default to " - "/var/log/sysak/tasktop/tasktop.log)\n"; + " tasktop -e 10 # limit the d-stack no more than 10, default is " + "20.\n" + " tasktop -f a.log # log to a.log \n"; static const struct argp_option opts[] = { {"human", 'H', 0, 0, "Output human-readable time info."}, @@ -78,11 +84,22 @@ static const struct argp_option opts[] = { {"sort", 's', "SORT", 0, "Sort the result, available options are user, sys and cpu, default is " "cpu"}, - {"limit", 'l', "LIMIT", 0, "Specify the top-LIMIT tasks to display"}, + {"r-limit", 'l', "LIMIT", 0, "Specify the top R-LIMIT tasks to display"}, + {"d-limit", 'e', "STACK-LIMIT", 0, + "Specify the STACK-LIMIT D tasks's stack to display"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, {}, }; +/* PROCESS MODE + /proc/pid/stat -- calculate process cpu util + /proc/pid/task/tid/stat -- check task state, if d read the stack */ + +/* THREAD MODE +/proc/pid/task/tid/stat -- calcualte thread cpu util +/proc/pid/task/tid/sat -- read task state, if d read stack +*/ static int prepare_directory(char* path) { int ret; @@ -172,6 +189,14 @@ static error_t parse_arg(int key, char* arg, struct argp_state* state) { } env.limit = val; break; + case 'e': + err = parse_long(arg, &val); + if (err || val <= 0) { + fprintf(stderr, "Failed parse d-stack limit.\n"); + argp_usage(state); + } + env.stack_limit = val; + break; case 'H': env.human = true; break; @@ -224,10 +249,9 @@ int swap(void* lhs, void* rhs, size_t sz) { return 0; } -static bool is_D(pid_t pid, pid_t tid) { +static bool is_D(pid_t pid, pid_t tid, D_task_record_t* t_rec) { int res = false; char path[FILE_PATH_LEN]; - struct proc_stat_t proc_info; snprintf(path, FILE_PATH_LEN, "/proc/%d/task/%d/stat", pid, tid); FILE* fp = fopen(path, "r"); @@ -235,14 +259,27 @@ static bool is_D(pid_t pid, pid_t tid) { return res; } - fscanf(fp, "%d %s %c", &proc_info.pid, proc_info.comm, &proc_info.state); - if (proc_info.state == 'D') res = true; + t_rec->pid = pid; + memset(t_rec->comm, 0, sizeof(t_rec->comm)); + fscanf(fp, "%d %s", &t_rec->tid, t_rec->comm); + + /* process the situation comm contains space,eg. comm=(Signal Dispatch) */ + while (true) { + int len = strlen(t_rec->comm); + if (t_rec->comm[len - 1] == ')') break; + fscanf(fp, "%s", t_rec->comm + len); + } + + char state; + fscanf(fp, " %c", &state); + + if (state == 'D') res = true; fclose(fp); return res; } -static int read_stack(pid_t pid, pid_t tid) { +static int read_stack(pid_t pid, pid_t tid, D_task_record_t* t_rec) { int err = 0; char stack_path[FILE_PATH_LEN]; snprintf(stack_path, FILE_PATH_LEN, "/proc/%d/task/%d/stack", pid, tid); @@ -252,30 +289,44 @@ static int read_stack(pid_t pid, pid_t tid) { err = errno; goto cleanup; } - - char buf[1024]; - fprintf(stderr, "path=%s\n", stack_path); - while (fgets(buf, 1024, fp)) { - fprintf(stderr, "%s", buf); - } - fprintf(stderr, "\n"); + memset(t_rec->stack, 0, sizeof(t_rec->stack)); + fread(t_rec->stack, STACK_CONTENT_LEN, 1, fp); cleanup: if (fp) fclose(fp); return err; } -static int read_d_task(struct id_pair_t* pids, int nr_thread) { +static int read_d_task(struct id_pair_t* pids, int nr_thread, int* stack_num, + struct D_task_record_t* d_tasks) { int i = 0; int err = 0; + +#ifdef DEBUG + struct timeval start, end; + err = gettimeofday(&start, 0); + if (err) fprintf(stderr, "read start time error.\n"); +#endif + + int d_num = 0; for (i = 0; i < nr_thread; i++) { + if (d_num >= env.stack_limit) break; int pid = pids[i].pid; int tid = pids[i].tid; - if (is_D(pid, tid)) { - read_stack(pid, tid); + if (is_D(pid, tid, d_tasks + d_num)) { + read_stack(pid, tid, d_tasks + d_num); + d_num++; } } + *stack_num = d_num; + +#ifdef DEBUG + err = gettimeofday(&end, 0); + if (err) fprintf(stderr, "read end time error.\n"); + fprintf(stderr, "read %d thread user %lds %ldus.\n", nr_thread, + end.tv_sec - start.tv_sec, end.tv_usec - start.tv_usec); +#endif return err; } @@ -313,59 +364,68 @@ cleanup: return 0; } -static int read_cgroup_throttle() { +static int read_cgroup_throttle(cgroup_cpu_stat_t* cgroups, int* cgroup_num) { #define CGROUP_PATH "/sys/fs/cgroup/cpu" int err = 0; - DIR* root_dir = opendir(CGROUP_PATH); struct dirent* dir = 0; + int c_num = 0; + + DIR* root_dir = opendir(CGROUP_PATH); + if (!root_dir) { + fprintf(stderr, "Failed open %s\n", CGROUP_PATH); + goto cleanup; + } + while ((dir = readdir(root_dir)) != NULL) { + char name[128]; + unsigned long long val = 0; + + if (c_num >= env.cgroup_limit) break; + if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..") || dir->d_type != DT_DIR) { continue; } + char stat_path[BUF_SIZE]; snprintf(stat_path, BUF_SIZE, "%s/%s/cpu.stat", CGROUP_PATH, dir->d_name); - cgroup_cpu_stat_t stat; - - memset(&stat, 0, sizeof(cgroup_cpu_stat_t)); - char name[128]; - unsigned long long val = 0; FILE* fp = fopen(stat_path, "r"); if (!fp) { fprintf(stderr, "Failed open cpu.stat[%s].\n", stat_path); - err = errno; - return err; + continue; } + cgroup_cpu_stat_t* stat = cgroups + c_num; + memset(stat->cgroup_name, 0, sizeof(stat->cgroup_name)); + + strncpy(stat->cgroup_name, dir->d_name, sizeof(stat->cgroup_name) - 1); while (fscanf(fp, "%s %llu", name, &val) != EOF) { if (!strcmp(name, "nr_periods")) { - stat.nr_periods = val; + stat->nr_periods = val; } else if (!strcmp(name, "nr_throttled")) { - stat.nr_throttled = val; + stat->nr_throttled = val; } else if (!strcmp(name, "throttled_time")) { - stat.throttled_time = val; + stat->throttled_time = val; } else if (!strcmp(name, "nr_burst")) { - stat.nr_burst = val; + stat->nr_burst = val; } else if (!strcmp(name, "burst_time")) { - stat.burst_time = val; + stat->burst_time = val; } } -#ifdef DEBUG - fprintf(stderr, - "[%-30s] nr_periods=%d nr_throttled=%d throttled_time=%llu " - "nr_burst=%d burst_time=%llu\n", - stat_path, stat.nr_periods, stat.nr_throttled, - stat.throttled_time, stat.nr_burst, stat.burst_time); -#endif + if (stat->nr_throttled > 0) c_num++; + fclose(fp); } +cleanup: + if (root_dir) closedir(root_dir); + *cgroup_num = c_num; return err; } -static int read_stat(struct sys_cputime_t* prev_sys, - struct sys_cputime_t* now_sys, +static int read_stat(struct sys_cputime_t** prev_sys, + struct sys_cputime_t** now_sys, struct sys_record_t* sys_rec) { int err = 0; int i = 0; @@ -375,25 +435,26 @@ static int read_stat(struct sys_cputime_t* prev_sys, err = errno; goto cleanup; } + for (i = 0; i <= nr_cpu; i++) { /*now only read first line, maybe future will read more info*/ fscanf(fp, "%s %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", - now_sys[i].cpu, &now_sys[i].usr, &now_sys[i].nice, - &now_sys[i].sys, &now_sys[i].idle, &now_sys[i].iowait, - &now_sys[i].irq, &now_sys[i].softirq, &now_sys[i].steal, - &now_sys[i].guest, &now_sys[i].guest_nice); - - if (prev_sys[i].usr == 0) continue; - - int now_time = now_sys[i].usr + now_sys[i].sys + now_sys[i].nice + - now_sys[i].idle + now_sys[i].iowait + now_sys[i].irq + - now_sys[i].softirq + now_sys[i].steal + - now_sys[i].guest + now_sys[i].guest_nice; - int prev_time = prev_sys[i].usr + prev_sys[i].sys + prev_sys[i].nice + - prev_sys[i].idle + prev_sys[i].iowait + - prev_sys[i].irq + prev_sys[i].softirq + - prev_sys[i].steal + prev_sys[i].guest + - prev_sys[i].guest_nice; + now_sys[i]->cpu, &now_sys[i]->usr, &now_sys[i]->nice, + &now_sys[i]->sys, &now_sys[i]->idle, &now_sys[i]->iowait, + &now_sys[i]->irq, &now_sys[i]->softirq, &now_sys[i]->steal, + &now_sys[i]->guest, &now_sys[i]->guest_nice); + + if (prev_sys[i]->usr == 0) continue; + + int now_time = now_sys[i]->usr + now_sys[i]->sys + now_sys[i]->nice + + now_sys[i]->idle + now_sys[i]->iowait + now_sys[i]->irq + + now_sys[i]->softirq + now_sys[i]->steal + + now_sys[i]->guest + now_sys[i]->guest_nice; + int prev_time = prev_sys[i]->usr + prev_sys[i]->sys + + prev_sys[i]->nice + prev_sys[i]->idle + + prev_sys[i]->iowait + prev_sys[i]->irq + + prev_sys[i]->softirq + prev_sys[i]->steal + + prev_sys[i]->guest + prev_sys[i]->guest_nice; int all_time = now_time - prev_time; // int all_time = (sysconf(_SC_NPROCESSORS_ONLN) * env.delay * // sysconf(_SC_CLK_TCK)); @@ -403,11 +464,11 @@ static int read_stat(struct sys_cputime_t* prev_sys, * sched delay occur , the sum of cpu rates more than 100%. */ sys_rec->cpu[i].usr = - (double)(now_sys[i].usr - prev_sys[i].usr) * 100 / all_time; + (double)(now_sys[i]->usr - prev_sys[i]->usr) * 100 / all_time; sys_rec->cpu[i].sys = - (double)(now_sys[i].sys - prev_sys[i].sys) * 100 / all_time; + (double)(now_sys[i]->sys - prev_sys[i]->sys) * 100 / all_time; sys_rec->cpu[i].iowait = - (double)(now_sys[i].iowait - prev_sys[i].iowait) * 100 / all_time; + (double)(now_sys[i]->iowait - prev_sys[i]->iowait) * 100 / all_time; } cleanup: if (fp) fclose(fp); @@ -454,10 +515,7 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { if (err) continue; pid = val; - // if (!env.thread_mode) { - // pids[proc_num].pid = pid; - // pids[proc_num++].tid = -1; - // } else { + char taskpath[FILE_PATH_LEN]; snprintf(taskpath, FILE_PATH_LEN, "/proc/%d/task", pid); task_dir = opendir(taskpath); @@ -477,6 +535,7 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { if (err) { fprintf(stderr, "Failed parse tid\n"); + goto cleanup; } tid = val; @@ -489,7 +548,6 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { closedir(task_dir); task_dir = NULL; } - // } } *num = nr_thread; cleanup: @@ -499,7 +557,8 @@ cleanup: } static int read_proc(pid_t pid, pid_t tid, struct task_cputime_t** prev, - struct task_cputime_t** now, struct task_record_t** rec) { + struct task_cputime_t** now, + struct R_task_record_t** rec) { struct proc_stat_t proc_info; char proc_path[FILE_PATH_LEN]; struct task_cputime_t* data; @@ -537,17 +596,25 @@ static int read_proc(pid_t pid, pid_t tid, struct task_cputime_t** prev, goto cleanup; } + fscanf(fp, "%d %s", &proc_info.pid, proc_info.comm); + + /* process the situation comm contains space,eg. comm=(Signal Dispatch) */ + while (true) { + int len = strlen(proc_info.comm); + if (proc_info.comm[len - 1] == ')') break; + fscanf(fp, " %s", proc_info.comm + len); + } + fscanf(fp, - "%d %s %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu %ld %ld %ld " + " %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu %ld %ld %ld " "%ld %ld %ld %llu", - &proc_info.pid, &proc_info.comm[0], &proc_info.state, - &proc_info.ppid, &proc_info.pgrp, &proc_info.session, - &proc_info.tty_nr, &proc_info.tpgid, &proc_info.flags, - &proc_info.minflt, &proc_info.cminflt, &proc_info.majflt, - &proc_info.cmajflt, &proc_info.utime, &proc_info.stime, - &proc_info.cutime, &proc_info.cstime, &proc_info.priority, - &proc_info.nice, &proc_info.num_threads, &proc_info.itrealvalue, - &proc_info.starttime); + &proc_info.state, &proc_info.ppid, &proc_info.pgrp, + &proc_info.session, &proc_info.tty_nr, &proc_info.tpgid, + &proc_info.flags, &proc_info.minflt, &proc_info.cminflt, + &proc_info.majflt, &proc_info.cmajflt, &proc_info.utime, + &proc_info.stime, &proc_info.cutime, &proc_info.cstime, + &proc_info.priority, &proc_info.nice, &proc_info.num_threads, + &proc_info.itrealvalue, &proc_info.starttime); data->utime = proc_info.utime; data->stime = proc_info.stime; @@ -568,7 +635,7 @@ static int read_proc(pid_t pid, pid_t tid, struct task_cputime_t** prev, if (base != 0) { /* only process cpu utilization > 0 */ if (udelta + sdelta > 0) { - *rec = calloc(1, sizeof(struct task_record_t)); + *rec = calloc(1, sizeof(struct R_task_record_t)); (*rec)->pid = now[pid]->pid; (*rec)->ppid = now[pid]->ppid; (*rec)->runtime = run_time; @@ -589,7 +656,7 @@ cleanup: static void sort_records(struct record_t* rec, int rec_num, enum sort_type sort) { - struct task_record_t** records = rec->tasks; + struct R_task_record_t** records = rec->r_tasks; int i, j; for (i = 0; i < rec_num; i++) { for (j = i + 1; j < rec_num; j++) { @@ -598,7 +665,7 @@ static void sort_records(struct record_t* rec, int rec_num, } else if (records[i] && !records[j]) { continue; } else if (!records[i] && records[j]) { - swap(&records[i], &records[j], sizeof(struct task_record_t*)); + swap(&records[i], &records[j], sizeof(struct R_task_record_t*)); } else { double lth, rth; switch (sort) { @@ -621,7 +688,7 @@ static void sort_records(struct record_t* rec, int rec_num, if (lth < rth) { swap(&records[i], &records[j], - sizeof(struct task_record_t*)); + sizeof(struct R_task_record_t*)); } } } @@ -671,18 +738,16 @@ static char* second2str(time_t ts, char* buf, int size) { return buf; } -static void output(struct record_t* rec, int rec_num, FILE* dest) { - struct task_record_t** records = rec->tasks; - struct sys_record_t* sys = &rec->sys; - struct proc_fork_info_t* info = &(sys->most_fork_info); +static void output_ts(FILE* dest) { char stime_str[BUF_SIZE] = {0}; - char rtime_str[BUF_SIZE] = {0}; - time_t now = time(0); - int i = 0; + fprintf(dest, "[TIME-STAMP] %s\n", ts2str(now, stime_str, BUF_SIZE)); +} - fprintf(dest, "%s\n", ts2str(now, stime_str, BUF_SIZE)); - fprintf(dest, "UTIL&LOAD\n"); +static void output_sys_load(struct record_t* rec, FILE* dest) { + struct sys_record_t* sys = &rec->sys; + struct proc_fork_info_t* info = &(sys->most_fork_info); + fprintf(dest, "[UTIL&LOAD]\n"); fprintf(dest, "%6s %6s %6s %6s %6s %6s %6s :%5s \n", "usr", "sys", "iowait", "load1", "R", "D", "fork", "proc"); @@ -691,22 +756,55 @@ static void output(struct record_t* rec, int rec_num, FILE* dest) { sys->nr_D, sys->nr_fork); fprintf(dest, " : %s(%d) ppid=%d cnt=%lu \n", info->comm, info->pid, info->ppid, info->fork); +} -#ifdef DEBUG - fprintf(dest, "[ cpu ] %6s %6s %6s %10s\n", "usr", "sys", "iowait", +static void output_per_cpu(struct record_t* rec, FILE* dest) { + int i; + struct sys_record_t* sys = &rec->sys; + + fprintf(dest, "[PER_CPU]\n"); + fprintf(dest, "%7s %6s %6s %6s %10s\n", "cpu", "usr", "sys", "iowait", "delay(ns)"); for (i = 1; i <= nr_cpu; i++) { - fprintf(dest, "[cpu-%d] %6.1f %6.1f %6.1f %10llu\n", i - 1, + char cpu_name[10]; + snprintf(cpu_name, 10, "cpu-%d", i - 1); + fprintf(dest, "%7s %6.1f %6.1f %6.1f %10llu\n", cpu_name, sys->cpu[i].usr, sys->cpu[i].sys, sys->cpu[i].iowait, sys->percpu_sched_delay[i - 1]); } -#endif +} + +static void output_cgroup(struct record_t* rec, int cgroup_num, FILE* dest) { + cgroup_cpu_stat_t* cgroups = rec->cgroups; + + int i = 0; + + for (i = 0; i < cgroup_num; i++) { + if (i == 0) { + fprintf(dest, "[CGROUP]\n"); + fprintf(dest, "%20s %15s %15s %15s %15s %15s\n", "cgroup_name", + "nr_periods", "nr_throttled", "throttled_time", "nr_burst", + "burst_time"); + } + fprintf(dest, "%20s %15d %15d %15llu %15d %15llu\n", + cgroups[i].cgroup_name, cgroups[i].nr_periods, + cgroups[i].nr_throttled, cgroups[i].throttled_time, + cgroups[i].nr_burst, cgroups[i].burst_time); + } +} + +static void output_tasktop(struct record_t* rec, int rec_num, FILE* dest) { + struct R_task_record_t** records = rec->r_tasks; + int i; + char rtime_str[BUF_SIZE] = {0}; + char stime_str[BUF_SIZE] = {0}; + for (i = 0; i < rec_num; i++) { if (!records[i]) break; if (env.human) { if (i == 0) { - fprintf(dest, "TASKTOP\n"); + fprintf(dest, "[TASKTOP]\n"); fprintf(dest, "%18s %6s %6s %20s %15s %6s %6s %6s\n", "COMMAND", "PID", "PPID", "START", "RUN", "%UTIME", "%STIME", "%CPU"); @@ -721,7 +819,7 @@ static void output(struct record_t* rec, int rec_num, FILE* dest) { records[i]->all_cpu_rate); } else { if (i == 0) { - fprintf(dest, "TASKTOP\n"); + fprintf(dest, "[TASKTOP]\n"); fprintf(dest, "%18s %6s %6s %10s %10s %6s %6s %6s\n", "COMMAND", "PID", "PPID", "START", "RUN", "%UTIME", "%STIME", "%CPU"); @@ -735,14 +833,51 @@ static void output(struct record_t* rec, int rec_num, FILE* dest) { records[i]->all_cpu_rate); } } +} + +static void output_d_stack(struct record_t* rec, int d_num, FILE* dest) { + int i; + struct D_task_record_t* d_tasks = rec->d_tasks; + for (i = 0; i < d_num; i++) { + if (i == 0) { + fprintf(dest, "[D-STASK]\n"); + fprintf(dest, "%18s %6s %6s %6s\n", "COMMAND", "PID", "PPID", + "STACK"); + } + fprintf(dest, "%18s %6d %6d ", d_tasks[i].comm, d_tasks[i].tid, + d_tasks[i].pid); + + char* str = d_tasks[i].stack; + const char delim[2] = "\n"; + char* token; + + token = strtok(str, delim); + fprintf(dest, "%s\n", token); + + while (token) { + fprintf(dest, "%18s %6s %6s %s\n", "", "", "", token); + token = strtok(NULL, delim); + } + } +} + +static void output(struct record_t* rec, int rec_num, FILE* dest, int d_num, + int cgroup_num) { + output_ts(dest); + output_sys_load(rec, dest); + output_per_cpu(rec, dest); + output_cgroup(rec, cgroup_num, dest); + output_tasktop(rec, rec_num, dest); + output_d_stack(rec, d_num, dest); + fflush(dest); } static void now_to_prev(struct id_pair_t* pids, int nr_thread, int pidmax, struct task_cputime_t** prev_task, struct task_cputime_t** now_task, - struct sys_cputime_t* prev_sys, - struct sys_cputime_t* now_sys) { + struct sys_cputime_t** prev_sys, + struct sys_cputime_t** now_sys) { int i; for (i = 0; i < pidmax; i++) { if (prev_task[i]) { @@ -764,13 +899,15 @@ static void now_to_prev(struct id_pair_t* pids, int nr_thread, int pidmax, swap(&prev_task[pid], &now_task[pid], sizeof(struct task_cputime_t*)); } - swap(prev_sys, now_sys, sizeof(struct sys_cputime_t) * (nr_cpu + 1)); + for (i = 0; i <= nr_cpu; i++) { + swap(&prev_sys[i], &now_sys[i], sizeof(struct sys_cputime_t*)); + } } static int make_records(struct id_pair_t* pids, int nr_thread, struct record_t* rec, struct task_cputime_t** prev_task, struct task_cputime_t** now_task, int* rec_num) { - struct task_record_t** records = rec->tasks; + struct R_task_record_t** records = rec->r_tasks; int err = 0; u_int64_t i; int nr_rec = 0; @@ -808,7 +945,7 @@ static int make_records(struct id_pair_t* pids, int nr_thread, } static void free_records(struct record_t* rec, int nr_thread) { - struct task_record_t** records = rec->tasks; + R_task_record_t** records = rec->r_tasks; int i; for (i = 0; i < nr_thread; i++) { if (records[i]) free(records[i]); @@ -862,7 +999,7 @@ static int check_fork(int fork_map_fd, struct sys_record_t* sys_rec) { if (!next_key) continue; - total = total + info.fork; // for debug + total = total + info.fork; if (max_fork < info.fork) { max_fork = info.fork; @@ -876,16 +1013,14 @@ static int check_fork(int fork_map_fd, struct sys_record_t* sys_rec) { static void sigint_handler(int signo) { exiting = 1; } int main(int argc, char** argv) { - int err = 0; - int fork_map_fd = -1; + int err = 0, fork_map_fd = -1; FILE* stat_log = 0; struct tasktop_bpf* skel = 0; struct id_pair_t* pids = 0; struct task_cputime_t **prev_task = 0, **now_task = 0; - struct sys_cputime_t *prev_sys = 0, *now_sys = 0; + struct sys_cputime_t **prev_sys = 0, **now_sys = 0; struct record_t* rec = 0; - - prev_delay = calloc(nr_cpu, sizeof(int)); + u_int64_t i; nr_cpu = sysconf(_SC_NPROCESSORS_ONLN); @@ -926,15 +1061,30 @@ int main(int argc, char** argv) { rec = calloc(1, sizeof(struct record_t)); rec->sys.cpu = calloc(nr_cpu + 1, sizeof(struct cpu_util_t)); rec->sys.percpu_sched_delay = calloc(nr_cpu, sizeof(int)); + rec->d_tasks = calloc(env.stack_limit, sizeof(struct D_task_record_t)); + rec->cgroups = calloc(env.cgroup_limit, sizeof(cgroup_cpu_stat_t)); + if (!rec || !rec->sys.cpu || !rec->sys.percpu_sched_delay || + !rec->d_tasks || !rec->cgroups) { + err = 1; + fprintf(stderr, "Failed calloc memory\n"); + goto cleanup; + } + prev_delay = calloc(nr_cpu, sizeof(int)); pids = calloc(pidmax, sizeof(struct id_pair_t)); prev_task = calloc(pidmax, sizeof(struct task_cputime_t*)); now_task = calloc(pidmax, sizeof(struct task_cputime_t*)); - prev_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t)); - now_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t)); - if (!prev_task || !now_task) { - err = errno; - fprintf(stderr, "Failed calloc prev and now\n"); + prev_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t*)); + now_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t*)); + for (i = 0; i <= nr_cpu; i++) { + prev_sys[i] = calloc(1, sizeof(struct sys_cputime_t)); + now_sys[i] = calloc(1, sizeof(struct sys_cputime_t)); + } + + if (!prev_task || !now_task || !prev_delay || !pids || !prev_sys || + !now_sys) { + err = 1; + fprintf(stderr, "Failed calloc memory.\n"); goto cleanup; } @@ -948,13 +1098,13 @@ int main(int argc, char** argv) { skel = tasktop_bpf__open(); if (!skel) { err = 1; - fprintf(stderr, "Failed to open BPF skeleton\n"); + fprintf(stderr, "Failed to open BPF skeleton.\n"); goto cleanup; } err = tasktop_bpf__load(skel); if (err) { - fprintf(stderr, "Failed to load BPF skeleton\n"); + fprintf(stderr, "Failed to load BPF skeleton.\n"); goto cleanup; } @@ -962,34 +1112,39 @@ int main(int argc, char** argv) { err = tasktop_bpf__attach(skel); if (err) { - fprintf(stderr, "Failed to attach BPF skeleton\n"); + fprintf(stderr, "Failed to attach BPF skeleton.\n"); goto cleanup; } bool first = true; while (env.nr_iter-- && !exiting) { - read_cgroup_throttle(); + // printf("prev_sys=0x%x now_sys=0x%x\n", prev_sys, now_sys); + u_int64_t nr_thread = 0; + int rec_num = 0; + int d_num = 0; + int cgroup_num = 0; + +#ifndef ONLY_THREAD + read_cgroup_throttle(rec->cgroups, &cgroup_num); read_sched_delay(&rec->sys); check_fork(fork_map_fd, &rec->sys); runnable_proc(&rec->sys); unint_proc(&rec->sys); read_stat(prev_sys, now_sys, &rec->sys); +#endif /* get all process now */ - u_int64_t nr_thread = 0; - int rec_num = 0; - err = read_all_pids(pids, &nr_thread); if (err) { fprintf(stderr, "Failed read all pids.\n"); goto cleanup; } - printf("nr_thread=%lu\n", nr_thread); + read_d_task(pids, nr_thread, &d_num, rec->d_tasks); - read_d_task(pids, nr_thread); +#ifndef ONLY_THREAD + rec->r_tasks = calloc(nr_thread, sizeof(struct R_task_record_t*)); - rec->tasks = calloc(nr_thread, sizeof(struct task_record_t*)); /* if prev process info exist produce record*/ err = make_records(pids, nr_thread, rec, prev_task, now_task, &rec_num); if (err) { @@ -1002,7 +1157,7 @@ int main(int argc, char** argv) { /* output record */ if (!first) - output(rec, rec_num, stat_log); + output(rec, rec_num, stat_log, d_num, cgroup_num); else first = false; @@ -1011,15 +1166,15 @@ int main(int argc, char** argv) { /* update old info and free nonexist process info */ now_to_prev(pids, nr_thread, pidmax, prev_task, now_task, prev_sys, now_sys); - - if (env.nr_iter) sleep(env.delay); + // if (env.nr_iter) sleep(env.delay); + usleep(10000); +#endif } cleanup: - tasktop_bpf__destroy(skel); if (pids) free(pids); - u_int64_t i; + if (prev_task) { for (i = 0; i < pidmax; i++) { if (prev_task[i]) free(prev_task[i]); @@ -1036,5 +1191,6 @@ cleanup: if (stat_log) fclose(stat_log); + tasktop_bpf__destroy(skel); return err; } diff --git a/source/tools/detect/sched/tasktop/tasktop.h b/source/tools/detect/sched/tasktop/tasktop.h index 745daedf..fad81722 100644 --- a/source/tools/detect/sched/tasktop/tasktop.h +++ b/source/tools/detect/sched/tasktop/tasktop.h @@ -4,8 +4,9 @@ #include #include "common.h" +#define STACK_CONTENT_LEN 1024 #define FILE_PATH_LEN 256 -#define MAX_COMM_LEN 16 +#define MAX_COMM_LEN 30 #define PEROID 3 #define LIMIT 20 #define CPU_NAME_LEN 8 @@ -70,7 +71,7 @@ struct sys_cputime_t { long guest_nice; }; -struct task_record_t { +typedef struct R_task_record_t { int pid; int ppid; char comm[MAX_COMM_LEN]; @@ -79,9 +80,17 @@ struct task_record_t { double system_cpu_rate; double user_cpu_rate; double all_cpu_rate; -}; +} R_task_record_t; + +typedef struct D_task_record_t { + int pid; + int tid; + char comm[MAX_COMM_LEN]; + char stack[STACK_CONTENT_LEN]; +} D_task_record_t; typedef struct cgroup_cpu_stat_t { + char cgroup_name[MAX_COMM_LEN]; int nr_periods; int nr_throttled; unsigned long long throttled_time; @@ -112,8 +121,10 @@ typedef struct sys_record_t { } sys_record_t; struct record_t { - struct task_record_t **tasks; - struct sys_record_t sys; + R_task_record_t **r_tasks; + D_task_record_t *d_tasks; + cgroup_cpu_stat_t *cgroups; + sys_record_t sys; }; #endif \ No newline at end of file diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/test.c b/source/tools/detect/sched/tasktop/tasktopSelftest/test.c index c71f45d6..2eeea9a7 100644 --- a/source/tools/detect/sched/tasktop/tasktopSelftest/test.c +++ b/source/tools/detect/sched/tasktop/tasktopSelftest/test.c @@ -28,6 +28,8 @@ void *run_forever(void *arg) { } } +void *run_sleep(void *arg) { sleep(10000); } + void run_multithread() { pthread_t pid[128]; int i; @@ -41,6 +43,20 @@ void run_multithread() { } } +void run_multithread_sleep() { + #define THREAD_NUM 10000 + pthread_t pid[THREAD_NUM]; + int i; + for (i = 0; i < THREAD_NUM; i++) { + pthread_create(&pid[i], 0, run_sleep, 0); + // printf("fork.\n"); + } + + for (i = 0; i < THREAD_NUM; i++) { + pthread_join(pid[i], 0); + } +} + void loop_fork() { #define pnum 128 while (1) { @@ -138,6 +154,8 @@ int main(int argc, char **argv) { } else if (!strcmp(argv[1], "multi_thread")) { sleep(10); run_multithread(); + } else if (!strcmp(argv[1], "multi_thread_sleep")) { + run_multithread_sleep(); } else if (!strcmp(argv[1], "sleep")) { create_process(atoi(argv[2])); } -- Gitee From fcb00abb2e1fae003732364129ea0a05da06a078 Mon Sep 17 00:00:00 2001 From: ZouTao Date: Mon, 29 May 2023 17:14:05 +0800 Subject: [PATCH 3/5] change MAX_COMM_LEN for align --- source/tools/detect/sched/tasktop/tasktop.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/tools/detect/sched/tasktop/tasktop.h b/source/tools/detect/sched/tasktop/tasktop.h index fad81722..a90d9c1a 100644 --- a/source/tools/detect/sched/tasktop/tasktop.h +++ b/source/tools/detect/sched/tasktop/tasktop.h @@ -6,9 +6,7 @@ #define STACK_CONTENT_LEN 1024 #define FILE_PATH_LEN 256 -#define MAX_COMM_LEN 30 -#define PEROID 3 -#define LIMIT 20 +#define MAX_COMM_LEN 24 #define CPU_NAME_LEN 8 #define BUF_SIZE 512 #define DEBUG_LOG "./log/debug.log" -- Gitee From 31d297532f5d6858d44cc912a4822e74f02f9616 Mon Sep 17 00:00:00 2001 From: ZouTao Date: Tue, 30 May 2023 10:46:36 +0800 Subject: [PATCH 4/5] tasktop:1. fix error, read removed stat file lead to loop-forever 2. fix read_all_pids lead program exit early 3. update testcase and testdocument --- source/tools/detect/sched/tasktop/tasktop.c | 28 +- .../sched/tasktop/tasktopSelftest/test.c | 2 +- .../sched/tasktop/tasktopSelftest/test.md | 321 +++++++++++++----- 3 files changed, 247 insertions(+), 104 deletions(-) diff --git a/source/tools/detect/sched/tasktop/tasktop.c b/source/tools/detect/sched/tasktop/tasktop.c index e685782f..f8a969b3 100644 --- a/source/tools/detect/sched/tasktop/tasktop.c +++ b/source/tools/detect/sched/tasktop/tasktop.c @@ -261,25 +261,29 @@ static bool is_D(pid_t pid, pid_t tid, D_task_record_t* t_rec) { t_rec->pid = pid; memset(t_rec->comm, 0, sizeof(t_rec->comm)); - fscanf(fp, "%d %s", &t_rec->tid, t_rec->comm); + if (fscanf(fp, "%d %s", &t_rec->tid, t_rec->comm) == EOF) goto cleanup; /* process the situation comm contains space,eg. comm=(Signal Dispatch) */ while (true) { int len = strlen(t_rec->comm); if (t_rec->comm[len - 1] == ')') break; - fscanf(fp, "%s", t_rec->comm + len); + if (fscanf(fp, "%s", t_rec->comm + len) == EOF) goto cleanup; } char state; - fscanf(fp, " %c", &state); + if (fscanf(fp, " %c", &state) == EOF) goto cleanup; if (state == 'D') res = true; +cleanup: fclose(fp); return res; } static int read_stack(pid_t pid, pid_t tid, D_task_record_t* t_rec) { +#ifdef DEBUG + fprintf(stderr, "DEBUG: read_stack pid=%d tid=%d\n", pid, tid); +#endif int err = 0; char stack_path[FILE_PATH_LEN]; snprintf(stack_path, FILE_PATH_LEN, "/proc/%d/task/%d/stack", pid, tid); @@ -299,6 +303,9 @@ cleanup: static int read_d_task(struct id_pair_t* pids, int nr_thread, int* stack_num, struct D_task_record_t* d_tasks) { +#ifdef DEBUG + fprintf(stderr, "DEBUG: read_d_task\n"); +#endif int i = 0; int err = 0; @@ -375,7 +382,7 @@ static int read_cgroup_throttle(cgroup_cpu_stat_t* cgroups, int* cgroup_num) { fprintf(stderr, "Failed open %s\n", CGROUP_PATH); goto cleanup; } - + while ((dir = readdir(root_dir)) != NULL) { char name[128]; unsigned long long val = 0; @@ -503,6 +510,7 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { dir = opendir("/proc"); if (!dir) { + fprintf(stderr, "Failed open %s\n", "/proc"); err = errno; goto cleanup; } @@ -512,10 +520,9 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { !strcmp(proc_de->d_name, "..")) continue; err = parse_long(proc_de->d_name, &val); - if (err) continue; - pid = val; + pid = val; char taskpath[FILE_PATH_LEN]; snprintf(taskpath, FILE_PATH_LEN, "/proc/%d/task", pid); task_dir = opendir(taskpath); @@ -523,6 +530,8 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { if (errno == ENOENT) { continue; } + perror(taskpath); + fprintf(stderr, "Failed opendir %s\n", taskpath); err = errno; goto cleanup; } @@ -535,8 +544,7 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { if (err) { fprintf(stderr, "Failed parse tid\n"); - - goto cleanup; + break; } tid = val; @@ -1166,8 +1174,8 @@ int main(int argc, char** argv) { /* update old info and free nonexist process info */ now_to_prev(pids, nr_thread, pidmax, prev_task, now_task, prev_sys, now_sys); - // if (env.nr_iter) sleep(env.delay); - usleep(10000); + if (env.nr_iter) sleep(env.delay); + // usleep(10000); #endif } diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/test.c b/source/tools/detect/sched/tasktop/tasktopSelftest/test.c index 2eeea9a7..9be766ef 100644 --- a/source/tools/detect/sched/tasktop/tasktopSelftest/test.c +++ b/source/tools/detect/sched/tasktop/tasktopSelftest/test.c @@ -152,7 +152,7 @@ int main(int argc, char **argv) { } else if (!strcmp(argv[1], "bind")) { cpu_bind(0); } else if (!strcmp(argv[1], "multi_thread")) { - sleep(10); + sleep(30); run_multithread(); } else if (!strcmp(argv[1], "multi_thread_sleep")) { run_multithread_sleep(); diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/test.md b/source/tools/detect/sched/tasktop/tasktopSelftest/test.md index 408535dd..8f017999 100644 --- a/source/tools/detect/sched/tasktop/tasktopSelftest/test.md +++ b/source/tools/detect/sched/tasktop/tasktopSelftest/test.md @@ -47,26 +47,25 @@ #### 2.1.2 测试结果 - 2023-05-24 02:15:36 - UTIL&LOAD + [TIME-STAMP] 2023-05-30 01:37:05 + [UTIL&LOAD] usr sys iowait load1 R D fork : proc - 97.8 2.2 0.0 66.3 74 1 108 : cpuUsage.sh(3825) ppid=43661 cnt=18 - [ cpu ] usr sys iowait delay(ns) - [cpu-0] 95.3 4.7 0.0 50851273279 - [cpu-1] 96.3 3.7 0.0 57819560956 - [cpu-2] 100.0 0.0 0.0 51718093440 - [cpu-3] 99.7 0.3 0.0 51741848156 - TASKTOP + 96.4 3.5 0.0 63.2 71 1 124 : logagentctl.sh(33155) ppid=33152 cnt=6 + [PER_CPU] + cpu usr sys iowait delay(ns) + cpu-0 95.6 4.4 0.0 71161526462 + cpu-1 95.3 4.4 0.0 63930165092 + cpu-2 97.8 2.2 0.0 59050351997 + cpu-3 96.7 3.0 0.0 61105725553 + [TASKTOP] COMMAND PID PPID START RUN %UTIME %STIME %CPU - (stress) 332 325 1684894406 130 6.7 0.0 6.7 - (stress) 343 325 1684894406 130 6.7 0.0 6.7 - (stress) 328 325 1684894406 130 6.3 0.0 6.3 - (stress) 329 325 1684894406 130 6.3 0.0 6.3 - (stress) 330 325 1684894406 130 6.3 0.0 6.3 - (stress) 331 325 1684894406 130 6.3 0.0 6.3 - (stress) 326 325 1684894406 130 6.3 0.0 6.3 + (stress) 32171 32170 1685410602 23 7.0 0.0 7.0 + (stress) 32172 32170 1685410602 23 7.0 0.0 7.0 + (stress) 32173 32170 1685410602 23 7.0 0.0 7.0 + (stress) 32174 32170 1685410602 23 7.0 0.0 7.0 + (stress) 32175 32170 1685410602 23 7.0 0.0 7.0 -观察到load1迅速冲高,伴随系统以及per-cpu的cpu利用率打满,cpu时间集中于用户态,per-cpu的调度延迟达到50s。 +观察到load1迅速冲高,伴随系统以及per-cpu的cpu利用率打满,cpu时间集中于用户态,per-cpu的调度延迟达到60s。 ### 2.2 cpu绑核场景 @@ -82,24 +81,26 @@ #### 2.2.2 测试结果 - 2023-05-24 02:49:31 - UTIL&LOAD + [TIME-STAMP] 2023-05-30 01:35:43 + [UTIL&LOAD] usr sys iowait load1 R D fork : proc - 27.0 2.3 0.0 61.2 66 1 70 : walle-plugin-no(51999) ppid=100868 cnt=6 - [ cpu ] usr sys iowait delay(ns) - [cpu-0] 100.0 0.0 0.0 190259714471 - [cpu-1] 2.6 2.0 0.0 56072152 - [cpu-2] 2.7 4.3 0.0 95235540 - [cpu-3] 2.6 3.0 0.0 97010245 - TASKTOP - COMMAND PID PPID START RUN %UTIME %STIME %CPU - (telegraf) 100590 1 1684745851 150720 1.7 1.3 3.0 - (test) 48438 48433 1684896441 130 1.7 0.0 1.7 - (test) 48440 48433 1684896441 130 1.7 0.0 1.7 - (test) 48443 48433 1684896441 130 1.7 0.0 1.7 - (test) 48444 48433 1684896441 130 1.7 0.0 1.7 - (test) 48449 48433 1684896441 130 1.7 0.0 1.7 - (test) 48451 48433 1684896441 130 1.7 0.0 1.7 + 32.9 4.0 0.0 65.2 64 1 114 : cpuUsage.sh(30412) ppid=1755 cnt=15 + [PER_CPU] + cpu usr sys iowait delay(ns) + cpu-0 100.0 0.0 0.0 191098577265 + cpu-1 10.0 4.3 0.0 217842891 + cpu-2 11.0 6.6 0.0 152909467 + cpu-3 10.0 5.0 0.0 212943380 + [TASKTOP] + COMMAND PID PPID START RUN %UTIME %STIME %CPU + (node) 1816 1710 1685409611 932 16.3 1.3 17.7 + (AliYunDun) 27299 1 1685004504 406039 3.0 2.0 5.0 + (node) 1710 1697 1685409611 932 2.3 0.7 3.0 + (telegraf) 38654 1 1684980656 429887 1.7 1.3 3.0 + (test) 20079 20076 1685410217 326 1.7 0.0 1.7 + (test) 20080 20076 1685410217 326 1.7 0.0 1.7 + (test) 20081 20076 1685410217 326 1.7 0.0 1.7 + (test) 20082 20076 1685410217 326 1.7 0.0 1.7 观察到load1冲高,伴随有R状态进程数增多,但系统cpu利用率不高,cpu-0的利用率打满,cpu-0的调度延迟达到190s ### 2.3 大量fork场景 @@ -116,26 +117,50 @@ #### 2.3.2 测试结果 - 2023-05-24 03:42:18 - UTIL&LOAD - usr sys iowait load1 R D fork : proc - 57.8 36.5 0.0 28.5 43 1 16671 : test(122383) ppid=64110 cnt=16607 - [ cpu ] usr sys iowait delay(ns) - [cpu-0] 55.8 38.9 0.0 19125326036 - [cpu-1] 58.0 36.3 0.0 18447412733 - [cpu-2] 56.5 38.2 0.0 18997158534 - [cpu-3] 61.6 32.8 0.0 18552763236 - TASKTOP - COMMAND PID PPID START RUN %UTIME %STIME %CPU - (test) 122383 64110 1684899622 116 1.3 46.7 48.0 - (telegraf) 100590 1 1684745851 153887 3.7 1.7 5.3 - (uniagent) 100622 1 1684745851 153887 2.0 0.3 2.3 - (tasktop) 27523 27482 1684899160 578 0.3 0.7 1.0 - (argusagent) 102026 1 1684745875 153863 0.3 0.3 0.7 - (ksoftirqd/3) 26 2 1617171267 67728471 0.0 0.3 0.3 - (systemd) 1 0 1617171267 67728471 0.0 0.3 0.3 - (node) 43661 43620 1684891252 8486 0.0 0.3 0.3 - (dfget) 56945 1 1684899541 197 0.3 0.0 0.3 + [TIME-STAMP] 2023-05-30 01:38:51 + [UTIL&LOAD] + usr sys iowait load1 R D fork : proc + 60.5 33.6 0.0 49.6 78 1 16304 : test(34338) ppid=1893 cnt=16230 + [PER_CPU] + cpu usr sys iowait delay(ns) + cpu-0 58.2 35.3 0.0 24573513865 + cpu-1 60.5 34.0 0.0 24758192280 + cpu-2 60.9 33.2 0.0 24491136783 + cpu-3 62.3 31.8 0.0 24525447957 + [TASKTOP] + COMMAND PID PPID START RUN %UTIME %STIME %CPU + (test) 34338 1893 1685410675 56 1.3 41.0 42.3 + (telegraf) 38654 1 1684980656 430075 1.7 1.7 3.3 + (node) 1816 1710 1685409611 1120 1.7 1.0 2.7 + (tasktop) 29995 29987 1685410527 204 0.0 1.3 1.3 + (uniagent) 39728 1 1684980670 430061 1.0 0.3 1.3 + (kworker/3:3-eveIts) 50231 2 1685410706 25 0.0 1.3 1.3 + (kworker/2:0-eveIts) 27079 2 1685410443 288 0.0 1.0 1.0 + (dockerd) 1426 1 1684918991 491740 1.0 0.0 1.0 + (kworker/1:2-eveIts) 10237 2 1685409893 838 0.0 1.0 1.0 + (node) 1710 1697 1685409611 1120 0.7 0.0 0.7 + (kworker/0:2-eveIts) 32158 2 1685410601 130 0.0 0.7 0.7 + (argusagent) 39850 1 1684980673 430058 0.3 0.3 0.7 + (staragentd) 40538 1 1684980691 430040 0.0 0.7 0.7 + (systemd) 1 0 1684918982 491749 0.3 0.3 0.7 + (ilogtail) 38538 1620 1684980652 430079 0.0 0.3 0.3 + (systemd-logind)S 1159 1 1684918989 491742 0.0 0.3 0.3 + (systemd-journalS 38655 1 1684980656 430075 0.0 0.3 0.3 + (logagent) 39295 1 1684980663 430068 0.0 0.3 0.3 + (rcu_sched) 10 2 1684918982 491749 0.0 0.3 0.3 + (ksoftirqd/2) 21 2 1684918982 491749 0.0 0.3 0.3 + (walle) 40189 1 1684980677 430054 0.3 0.0 0.3 + (java) 40355 1 1684980680 430051 0.3 0.0 0.3 + (node) 1755 1710 1685409611 1120 0.0 0.3 0.3 + (docker) 43348 43343 1684980759 429972 0.0 0.3 0.3 + (dbus-daemon) 1125 1 1684918989 491742 0.3 0.0 0.3 + [D-STASK] + COMMAND PID PPID STACK + (load_calc) 141 141 [<0>] load_calc_func+0x57/0x130 + [<0>] load_calc_func+0x57/0x130 + [<0>] kthread+0xf5/0x130 + [<0>] ret_from_fork+0x1f/0x30 + [<0>] 0xffffffffffffffff 观察到load增高,同时CPU利用率也跑满,存在较多R进程但是没有被top捕捉到。此时fork增量激增,fork调用次数最多的进程为test,同时test进程的sys利用率较高。 @@ -145,7 +170,7 @@ #### 2.4.1 测试方法 -创建一个cgroup 限定cgroup的cpu额度 启动一个进程并将task的pid加入cgroup的tasks中 之后该进程创建128个线程执行计算任务 +创建一个cgroup,限定cgroup的cpu额度。启动一个进程并将task的pid加入cgroup的tasks中,之后该进程在30s后创建128个线程执行计算任务。 # 创建cgroup 设置限流30% 使用cpuset.cpus=0-3 cd /sys/fs/cgroup/cpu/ @@ -155,41 +180,150 @@ # run test ./test multi_thread + echo pid > tasks #### 2.4.2 测试结果 - [/sys/fs/cgroup/cpu/aegis/cpu.stat] nr_periods=4 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0 - [/sys/fs/cgroup/cpu/docker/cpu.stat] nr_periods=0 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0 - [/sys/fs/cgroup/cpu/infra.slice/cpu.stat] nr_periods=0 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0 - [/sys/fs/cgroup/cpu/agent/cpu.stat] nr_periods=0 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0 - [/sys/fs/cgroup/cpu/user.slice/cpu.stat] nr_periods=0 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0 - [/sys/fs/cgroup/cpu/stress_cg/cpu.stat] nr_periods=18841 nr_throttled=18829 throttled_time=6629585264179 nr_burst=0 burst_time=0 - [/sys/fs/cgroup/cpu/system.slice/cpu.stat] nr_periods=0 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0 - - 2023-05-24 08:20:52 - UTIL&LOAD - usr sys iowait load1 R D fork : proc - 12.6 2.2 0.0 1.2 2 1 76 : cpuUsage.sh(94513) ppid=3887 cnt=9 - [ cpu ] usr sys iowait delay(ns) - [cpu-0] 13.6 2.7 0.0 2731979911 - [cpu-1] 12.0 2.3 0.0 45323759752 - [cpu-2] 13.3 2.7 0.0 8640595296 - [cpu-3] 11.6 1.7 0.0 286465968694 - TASKTOP - COMMAND PID PPID START RUN %UTIME %STIME %CPU - (test) 20239 42752 1684914077 2375 40.0 0.3 40.3 - (AliYunDun) 5107 1 1684909031 7421 2.0 1.3 3.3 - (telegraf) 1455 1 1684908947 7505 1.7 1.3 3.0 - (uniagent) 1226 1 1684908945 7507 1.3 0.3 1.7 - (walle) 1166 1 1684908945 7507 1.0 0.0 1.0 - (node) 3887 3832 1684908988 7464 0.3 0.3 0.7 - (node) 3936 3832 1684908988 7464 0.3 0.3 0.7 - (java) 2360 1 1684908951 7501 0.0 0.3 0.3 - (logagent-collecS 2423 2347 1684908951 83 0.3 0.0 0.3 - (argusagent) 3096 1 1684908962 7490 0.0 0.3 0.3 + [TIME-STAMP] 2023-05-30 02:07:05 + [UTIL&LOAD] + usr sys iowait load1 R D fork : proc + 11.9 3.3 0.0 3.0 0 1 97 : logagentctl.sh(51049) ppid=51047 cnt=6 + [PER_CPU] + cpu usr sys iowait delay(ns) + cpu-0 11.6 3.0 0.0 320221229182 + cpu-1 12.3 3.3 0.0 2954563205 + cpu-2 12.2 3.3 0.0 5919534403 + cpu-3 11.8 3.9 0.0 30688036677 + [CGROUP] + cgroup_name nr_periods nr_throttled throttled_time nr_burst burst_time + stress_cg 2531 2523 928824820358 0 0 + [TASKTOP] + COMMAND PID PPID START RUN %UTIME %STIME %CPU + (test) 42327 1893 1685412142 283 30.0 0.0 30.0 + (AliYunDun) 7999 1 1685411022 1403 1.7 1.7 3.3 + (uniagent) 39728 1 1684980670 431755 2.0 1.0 3.0 + (telegraf) 38654 1 1684980656 431769 1.7 1.0 2.7 + (node) 1816 1710 1685409611 2814 1.7 0.7 2.3 + (tasktop) 23209 23200 1685411589 836 0.3 1.0 1.3 + (node) 1710 1697 1685409611 2814 0.7 0.3 1.0 + (walle) 40189 1 1684980677 431748 0.7 0.0 0.7 + (AliYunDunUpdateS 1793 1 1684918991 493434 0.3 0.0 0.3 + (systemd-journalS 38655 1 1684980656 431769 0.3 0.0 0.3 + (logagent) 39295 1 1684980663 431762 0.3 0.0 0.3 + (top) 27389 27304 1685411726 699 0.3 0.0 0.3 + (argusagent) 39850 1 1684980673 431752 0.3 0.0 0.3 + (ilogtail) 38538 1620 1684980652 431773 0.3 0.0 0.3 + (staragentd) 40538 1 1684980691 431734 0.0 0.3 0.3 + (dfget) 41456 1 1685412118 307 0.0 0.3 0.3 + (docker-containeS 38593 1426 1684980653 431772 0.3 0.0 0.3 + [D-STASK] + COMMAND PID PPID STACK + (load_calc) 141 141 [<0>] load_calc_func+0x57/0x130 + [<0>] load_calc_func+0x57/0x130 + [<0>] kthread+0xf5/0x130 + [<0>] ret_from_fork+0x1f/0x30 + [<0>] 0xffffffffffffffff 可以观察到此时虽然**实际负载**很高,大量task由于限流处于R状态,但是由于cgroup机制task并不位于就绪队列中,因此R状态数量指标不准确导致load1计算不准(load1无法准确体现出系统的负载情况)。但是在cgroup限流信息中可以看到stress_cg中**出现了大量的限流**,并且**per-cpu的调度延迟很高**,一定程度体现了cpu就绪队列中存在task堆积。 +### 2.5 D状态进程多场景 + +出现D状态的情况很多,最常见的是由于IO导致进入Uninterrupted Sleep状态。 + +#### 2.5.1 测试方法 + +利用stress工具开启64个进程将IO打满。 + + stress -i 64 + +#### 2.5.2 测试结果 + + [TIME-STAMP] 2023-05-30 02:37:38 + [UTIL&LOAD] + usr sys iowait load1 R D fork : proc + 5.2 72.4 17.1 15.5 42 31 76 : walle-plugin-no(34901) ppid=40189 cnt=6 + [PER_CPU] + cpu usr sys iowait delay(ns) + cpu-0 5.0 72.2 17.1 16162101481 + cpu-1 6.0 70.4 17.9 13251662817 + cpu-2 3.5 76.8 16.5 20181731711 + cpu-3 5.6 71.2 17.2 16403953008 + [CGROUP] + cgroup_name nr_periods nr_throttled throttled_time nr_burst burst_time + stress_cg 7811 7801 2868506676781 0 0 + [TASKTOP] + COMMAND PID PPID START RUN %UTIME %STIME %CPU + (telegraf) 38654 1 1684980656 433602 3.0 2.0 5.0 + (stress) 34147 34109 1685414245 13 0.0 4.7 4.7 + (stress) 34151 34109 1685414245 13 0.0 4.7 4.7 + (stress) 34159 34109 1685414245 13 0.0 4.7 4.7 + ... + (stress) 34125 34109 1685414245 13 0.0 3.7 3.7 + (stress) 34132 34109 1685414245 13 0.0 3.7 3.7 + (node) 1710 1697 1685409611 4647 2.7 0.3 3.0 + (tasktop) 23209 23200 1685411589 2669 0.3 1.0 1.3 + (jbd2/vda2-8) 606 2 1684918988 495270 0.0 1.3 1.3 + (uniagent) 39728 1 1684980670 433588 1.3 0.0 1.3 + (walle) 40189 1 1684980677 433581 1.3 0.0 1.3 + (argusagent) 39850 1 1684980673 433585 0.3 0.7 1.0 + (staragentd) 40538 1 1684980691 433567 0.3 0.7 1.0 + (ilogtail) 38538 1620 1684980652 433606 0.3 0.3 0.7 + (kworker/u8:2-flIsh- 26679 2 1685413999 259 0.0 0.3 0.3 + (kworker/u8:3-flIsh- 28798 2 1685414070 188 0.0 0.3 0.3 + (systemd-logind)S 1159 1 1684918989 495269 0.3 0.0 0.3 + (dockerd) 1426 1 1684918991 495267 0.3 0.0 0.3 + (sshd) 1609 1589 1685409610 4648 0.0 0.3 0.3 + (kworker/0:1H-kbIock 592 2 1684918983 495275 0.0 0.3 0.3 + (node) 1755 1710 1685409611 4647 0.0 0.3 0.3 + (docker-containeS 38593 1426 1684980653 433605 0.3 0.0 0.3 + (kworker/3:1H-kbIock 599 2 1684918983 495275 0.0 0.3 0.3 + (systemd-journalS 38655 1 1684980656 433602 0.0 0.3 0.3 + (logagent-collecS 39369 39295 1684980663 433595 0.3 0.0 0.3 + (kworker/u8:1-flIsh- 6901 2 1685410987 3271 0.0 0.3 0.3 + (node) 13915 1816 1685410008 4250 0.3 0.0 0.3 + (systemd) 1 0 1684918982 495276 0.3 0.0 0.3 + (dbus-daemon) 1125 1 1684918989 495269 0.3 0.0 0.3 + (kworker/u8:4-flIsh- 66011 2 1685412917 1341 0.0 0.3 0.3 + [D-STASK] + COMMAND PID PPID STACK + (load_calc) 141 141 [<0>] load_calc_func+0x57/0x130 + [<0>] load_calc_func+0x57/0x130 + [<0>] kthread+0xf5/0x130 + [<0>] ret_from_fork+0x1f/0x30 + [<0>] 0xffffffffffffffff + (jbd2/vda2-8) 606 606 [<0>] jbd2_journal_commit_transaction+0x1356/0x1b60 [jbd2] + [<0>] jbd2_journal_commit_transaction+0x1356/0x1b60 [jbd2] + [<0>] kjournald2+0xc5/0x260 [jbd2] + [<0>] kthread+0xf5/0x130 + [<0>] ret_from_fork+0x1f/0x30 + [<0>] 0xffffffffffffffff + (stress) 34110 34110 [<0>] submit_bio_wait+0x84/0xc0 + [<0>] submit_bio_wait+0x84/0xc0 + [<0>] blkdev_issue_flush+0x7c/0xb0 + [<0>] ext4_sync_fs+0x158/0x1e0 [ext4] + [<0>] sync_filesystem+0x6e/0x90 + [<0>] ovl_sync_fs+0x36/0x50 [overlay] + [<0>] iterate_supers+0xb3/0x100 + [<0>] ksys_sync+0x60/0xb0 + [<0>] __ia32_sys_sync+0xa/0x10 + [<0>] do_syscall_64+0x55/0x1a0 + [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<0>] 0xffffffffffffffff + ... + (stress) 34131 34131 [<0>] submit_bio_wait+0x84/0xc0 + [<0>] submit_bio_wait+0x84/0xc0 + [<0>] blkdev_issue_flush+0x7c/0xb0 + [<0>] ext4_sync_fs+0x158/0x1e0 [ext4] + [<0>] sync_filesystem+0x6e/0x90 + [<0>] ovl_sync_fs+0x36/0x50 [overlay] + [<0>] iterate_supers+0xb3/0x100 + [<0>] ksys_sync+0x60/0xb0 + [<0>] __ia32_sys_sync+0xa/0x10 + [<0>] do_syscall_64+0x55/0x1a0 + [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<0>] 0xffffffffffffffff +可以看到当前存在大量D状态task,并且系统的sys较高,并且抓取到了D-task的内核栈,主要是由stress进程在进行IO导致。 + ## 3. Tasktop性能测试 tasktop在运行时会对/proc文件系统进行遍历,采集相关信息,大量进程下可能会影响业务,因此对tasktop在不同进程数的场景下进行性能测试。 @@ -197,18 +331,19 @@ tasktop在运行时会对/proc文件系统进行遍历,采集相关信息, ### 3.1 测试方法 创建N个进程,并让这N个进程进入sleep状态,不占用CPU资源,只增加proc文件数量。 +./test sleep ### 3.2 测试结果 | Process Number | CPU Utilization | | :---------: | :---------: | | 147 | 0.3% | -| 1155 | 1.0% | -| 2157 | 1.9-2.0% | -| 4147 | 3.7-4.7% | -| 8152 | 8-11% | -| 12198 | 15.7-18% | -| 15161 | 18-21% | -| 20173 | 26.9-31.6% | - -在存在20000个进程proc文件情况下,tasktop的整体cpu资源消耗在单核的30%左右。 +| 1155 | 2.0% | +| 2157 | 4.3% | +| 4147 | 8.3% | +| 8152 | 11%-20% | +| 12198 | 24%-33% | +| 15161 | 27%-37% | +| 20173 | 30%-59% | + +在存在20000个进程proc文件情况下,tasktop的整体cpu资源平均消耗估计在单核的45%左右。 -- Gitee From 329dbfe4847bc9579c2c2728da4a8a25bc3af263 Mon Sep 17 00:00:00 2001 From: ZouTao Date: Tue, 30 May 2023 13:43:10 +0800 Subject: [PATCH 5/5] tasktop: delete some log --- source/tools/detect/sched/tasktop/tasktop.c | 17 ++++------------- .../sched/tasktop/tasktopSelftest/run_stress.sh | 7 +++++++ 2 files changed, 11 insertions(+), 13 deletions(-) create mode 100644 source/tools/detect/sched/tasktop/tasktopSelftest/run_stress.sh diff --git a/source/tools/detect/sched/tasktop/tasktop.c b/source/tools/detect/sched/tasktop/tasktop.c index f8a969b3..4dbcee50 100644 --- a/source/tools/detect/sched/tasktop/tasktop.c +++ b/source/tools/detect/sched/tasktop/tasktop.c @@ -527,13 +527,8 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { snprintf(taskpath, FILE_PATH_LEN, "/proc/%d/task", pid); task_dir = opendir(taskpath); if (!task_dir) { - if (errno == ENOENT) { - continue; - } - perror(taskpath); - fprintf(stderr, "Failed opendir %s\n", taskpath); - err = errno; - goto cleanup; + // fprintf(stderr, "Failed opendir %s\n", taskpath); + continue; } while ((task_de = readdir(task_dir)) != NULL) { @@ -1142,11 +1137,7 @@ int main(int argc, char** argv) { #endif /* get all process now */ - err = read_all_pids(pids, &nr_thread); - if (err) { - fprintf(stderr, "Failed read all pids.\n"); - goto cleanup; - } + read_all_pids(pids, &nr_thread); read_d_task(pids, nr_thread, &d_num, rec->d_tasks); @@ -1175,7 +1166,7 @@ int main(int argc, char** argv) { now_to_prev(pids, nr_thread, pidmax, prev_task, now_task, prev_sys, now_sys); if (env.nr_iter) sleep(env.delay); - // usleep(10000); + // usleep(10000); #endif } diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/run_stress.sh b/source/tools/detect/sched/tasktop/tasktopSelftest/run_stress.sh new file mode 100644 index 00000000..cdedba79 --- /dev/null +++ b/source/tools/detect/sched/tasktop/tasktopSelftest/run_stress.sh @@ -0,0 +1,7 @@ +#!bash + +for((i=0;i<10000;i++)); +do + stress -c 16 -i 16 -t 10 + sleep 1 +done -- Gitee