diff --git a/source/tools/detect/iosdiag/README.md b/source/tools/detect/iosdiag/README.md index 1d842d0c90b588bae30cf6a9d837da0ab5bbbe77..37fb81ead5eae3eeb24b1ff9417448d6a91621b3 100644 --- a/source/tools/detect/iosdiag/README.md +++ b/source/tools/detect/iosdiag/README.md @@ -190,4 +190,4 @@ more details see /var/log/sysak/iosdiag/latency/result.log* ... ] } -``` \ No newline at end of file +``` diff --git a/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py index 53279b64d813462e42f693d732bb92c8a0b3ef98..4a1410dfc723cb2ed1ceff08ac62997963846959 100755 --- a/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py +++ b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py @@ -25,17 +25,6 @@ def humConvert(value): return "%.2f%s/s" % (value, units[i]) value = value / size -def timeConvert(value, withUnit): - units = ["us", "ms", "s"] - size = 1000.0 - for i in range(len(units)): - if (value / size) < 1: - if withUnit: - return "%.2f %s" % (value, units[i]) - else: - return "%.2f" % (value) - value = value / size - class latencyAnalysis: def __init__(self): self.delayStatDicts = {} @@ -112,9 +101,9 @@ class latencyAnalysis: diskIdx = diskIdxDicts[disk] del sDict['diskname'] listAbnormal=[i for i in sDict['abnormal'].split(' ') if i != '']; - delay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[0]), False) - totalDelay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[1]), True) - sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+delay+":"+totalDelay+")" + msDelay=int(listAbnormal[-2].strip('(').split(':')[0]) / 1000.000 + msTotalDelay=int(listAbnormal[-2].strip('(').split(':')[1]) / 1000.000 + sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+str(msDelay)+":"+str(msTotalDelay)+" ms)" summaryDicts['summary'][diskIdx]['slow ios'].append(sDict) def processOneLatencySeq(self, sDict): @@ -153,15 +142,6 @@ class latencyAnalysis: maxPercent = percent delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] = str(percent)+"%" - summaryDicts['summary'][diskIdx]['slow ios']=\ - sorted(summaryDicts['summary'][diskIdx]['slow ios'],\ - key=lambda e:float(re.split(':| ', e['abnormal'])[-2]),\ - reverse=True) - delayDicts['summary'][diskIdx]['slow ios']=\ - sorted(delayDicts['summary'][diskIdx]['slow ios'],\ - key=lambda e:e['totaldelay'],\ - reverse=True) - def latencyPrint(self, threshold): diskIdxDicts = self.diskIdxDicts totalIosDicts = self.totalIosDicts @@ -171,6 +151,10 @@ class latencyAnalysis: for disk, diskIdx in diskIdxDicts.items(): totalIos = totalIosDicts[disk] + summaryDicts['summary'][diskIdx]['slow ios']=\ + sorted(summaryDicts['summary'][diskIdx]['slow ios'],\ + key=lambda e:float(re.split(':| ', e['abnormal'])[-2]),\ + reverse=True) print("\n%d IOs of disk %s over %d ms, delay distribution:" %(totalIos, disk, threshold)) for component,idx in componentDicts.items(): percent = delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] @@ -178,12 +162,12 @@ class latencyAnalysis: end = totalIos if totalIos < 10 else 10 print("The first %d IOs with the largest delay, more details:" % end) - print("seq".ljust(6)+"comm".ljust(20)+"pid".ljust(10)+"iotype".ljust(8)+\ + print("time".ljust(26)+"comm".ljust(20)+"pid".ljust(10)+"iotype".ljust(8)+\ "datalen".ljust(16)+"abnormal(delay:totaldelay)".ljust(40)) for i in range(0,end): eDict=summaryDicts['summary'][diskIdx]['slow ios'][i] - print(str(eDict["seq"]).ljust(6)+eDict["comm"].ljust(20)+\ + print(str(eDict["time"]).ljust(26)+eDict["comm"].ljust(20)+\ str(eDict["pid"]).ljust(10)+eDict["iotype"].ljust(8)+\ str(eDict["datalen"]).ljust(16)+eDict["abnormal"].ljust(40)) diff --git a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h index 85663a524de10d58c4e00974ab9c86ac5db12b2d..baf3783979d802d0d05d7669ebb8c11af8a048c4 100644 --- a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h +++ b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h @@ -22,6 +22,12 @@ struct bpf_map_def SEC("maps") iosdiag_maps_targetdevt = { .max_entries = 1, }; +struct bpf_map_def SEC("maps") iosdiag_maps_notify = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), +}; + static inline int iosdiag_pkg_check(void *data, unsigned int len) { return 1; @@ -40,9 +46,8 @@ static unsigned int get_target_devt(void) } static void -init_iosdiag_key(unsigned int dev, unsigned long sector, struct iosdiag_key *key) +init_iosdiag_key(unsigned long sector, struct iosdiag_key *key) { - key->dev = dev; key->sector = sector; } @@ -53,22 +58,11 @@ trace_io_driver_route(struct pt_regs *ctx, struct request *req, enum ioroute_typ struct iosdiag_req new_ioreq = {0}; struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); - dev_t dev; sector_t sector; struct gendisk *rq_disk; - struct hd_struct *part; - struct device *device; - u32 target_devt = get_target_devt(); - - bpf_probe_read(&part, sizeof(struct hd_struct *), &req->part); - bpf_probe_read(&device, sizeof(struct device *), &part->__dev); - bpf_probe_read(&dev, sizeof(dev_t), &device->devt); - if (target_devt && dev != target_devt) - return 0; bpf_probe_read(§or, sizeof(sector_t), &req->__sector); - - init_iosdiag_key(dev, sector, &key); + init_iosdiag_key(sector, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (!ioreq->ts[type]) @@ -107,7 +101,7 @@ static int tracepoint_block_getrq(struct block_getrq_args *args) return 0; new_ioreq.cpu[0] = new_ioreq.cpu[1] = new_ioreq.cpu[2] = -1; - init_iosdiag_key(args->dev, args->sector, &key); + init_iosdiag_key(args->sector, &key); if (pid) memcpy(new_ioreq.comm, args->comm, sizeof(args->comm)); new_ioreq.ts[IO_START_POINT] = now; @@ -144,7 +138,7 @@ static int tracepoint_block_rq_issue(struct block_rq_issue_args *args) if (target_devt && args->dev != target_devt) return 0; - init_iosdiag_key(args->dev, args->sector, &key); + init_iosdiag_key(args->sector, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (ioreq->ts[type]) @@ -175,29 +169,31 @@ SEC("tracepoint/block/block_rq_complete") static int tracepoint_block_rq_complete(struct block_rq_complete_args *args) { struct iosdiag_req *ioreq; - struct iosdiag_req new_ioreq = {0}; + struct iosdiag_req data = {0}; struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); u32 target_devt = get_target_devt(); + int complete = 0; if (target_devt && args->dev != target_devt) return 0; - init_iosdiag_key(args->dev, args->sector, &key); + init_iosdiag_key(args->sector, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (!ioreq->ts[IO_COMPLETE_TIME_POINT]) ioreq->ts[IO_COMPLETE_TIME_POINT] = now; if (ioreq->ts[IO_ISSUE_DEVICE_POINT] && ioreq->ts[IO_RESPONCE_DRIVER_POINT]) - ioreq->complete = 1; + complete = 1; ioreq->cpu[2] = bpf_get_smp_processor_id(); } else return 0; - if (ioreq->complete) - bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); - else - bpf_map_delete_elem(&iosdiag_maps, &key); + if (complete) { + memcpy(&data, ioreq, sizeof(data)); + bpf_perf_event_output(args, &iosdiag_maps_notify, 0xffffffffULL, &data, sizeof(data)); + } + bpf_map_delete_elem(&iosdiag_maps, &key); return 0; } #endif diff --git a/source/tools/detect/iosdiag/latency/collect.c b/source/tools/detect/iosdiag/latency/collect.c index 48a26aed8fa7db9dd41081b97ca777b92593d011..24b355ce206e465207ed60c99998fe6d50383d51 100644 --- a/source/tools/detect/iosdiag/latency/collect.c +++ b/source/tools/detect/iosdiag/latency/collect.c @@ -29,7 +29,10 @@ DECLEAR_BPF_OBJ(iosdiag_scsi); static int iosdiag_map; static int iosdiag_virtblk_map; static int iosdiag_maps_targetdevt; +static int iosdiag_maps_notify; static int g_stop; +static int g_log_fd = -1; +static char *g_json_buf; extern unsigned long get_threshold_us(void); static int exec_shell_cmd(char *cmd) @@ -56,52 +59,47 @@ static int over_threshold(struct iosdiag_req *iop) unsigned long delay_ns = iop->ts[IO_COMPLETE_TIME_POINT] - iop->ts[IO_START_POINT]; - if (threshold_ns && delay_ns >= threshold_ns) + if (delay_ns >= threshold_ns) return 1; return 0; } -static void iosdiag_store_result(int fd) +static void iosdiag_store_result(void *ctx, int cpu, void *data, __u32 size) { - struct iosdiag_key key, next_key; - struct iosdiag_req iop; - unsigned long sleep_us = get_threshold_us() ? get_threshold_us() : 1000; - char *buf; - int i = 0; - unsigned int seq = 0; + struct iosdiag_req *iop = (struct iosdiag_req *)data; + char *buf = g_json_buf; + int fd = g_log_fd; + + if (over_threshold(iop)) { + set_check_time_date(); + summary_convert_to_json(buf, iop); + delay_convert_to_json(buf + strlen(buf), iop); + write(fd, buf, strlen(buf)); + } +} + +static void iosdiag_collect(void) +{ + struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb; + + pb_opts.sample_cb = iosdiag_store_result; + pb = perf_buffer__new(iosdiag_maps_notify, 1, &pb_opts); printf("running..."); fflush(stdout); - buf = malloc(JSON_BUFFER_SIZE); - memset(buf, 0x0, JSON_BUFFER_SIZE); - while (!g_stop) { - if (bpf_map_get_next_key(iosdiag_map, &key, &next_key) == 0) { - bpf_map_lookup_elem(iosdiag_map, &next_key, &iop); - if (iop.complete) { - if (over_threshold(&iop)) { - seq++; - set_check_time_date(); - summary_convert_to_json(buf, &iop, seq); - delay_convert_to_json(buf + strlen(buf), &iop, seq); - write(fd, buf, strlen(buf)); - } - bpf_map_delete_elem(iosdiag_map, &next_key); - } - key = next_key; - if (i++ > 50) { - usleep(sleep_us); - i = 0; - } - } else - usleep(sleep_us); - } - free(buf); + g_json_buf = malloc(JSON_BUFFER_SIZE); + memset(g_json_buf, 0x0, JSON_BUFFER_SIZE); + while (!g_stop) + perf_buffer__poll(pb, 100); + perf_buffer__free(pb); + free(g_json_buf); printf("done\n"); } static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) { - //return vfprintf(stderr, format, args); + //return vfprintf(stderr, format, args); return 0; } @@ -139,6 +137,7 @@ static void iosdiag_stop(int signo) } \ if (load_map) { \ iosdiag_map = bpf_map__fd(name->maps.iosdiag_maps); \ + iosdiag_maps_notify = bpf_map__fd(name->maps.iosdiag_maps_notify); \ iosdiag_maps_targetdevt = bpf_map__fd(name->maps.iosdiag_maps_targetdevt); \ } \ if (!__ret) \ @@ -237,7 +236,6 @@ int iosdiag_init(char *devname) int iosdiag_run(int timeout, char *output_file) { - int fd_log; char filepath[256]; char cmd[272]; @@ -249,8 +247,8 @@ int iosdiag_run(int timeout, char *output_file) strcpy(filepath, output_file); sprintf(cmd, "mkdir %s -p", dirname(filepath)); exec_shell_cmd(cmd); - fd_log = open(output_file, O_RDWR | O_CREAT, 0755); - if (fd_log < 0) { + g_log_fd = open(output_file, O_RDWR | O_CREAT, 0755); + if (g_log_fd < 0) { printf("error: create output file \"%s\" fail\n", output_file); return -1; } @@ -258,8 +256,8 @@ int iosdiag_run(int timeout, char *output_file) signal(SIGALRM, iosdiag_stop); if (timeout) alarm(timeout); - iosdiag_store_result(fd_log); - close(fd_log); + iosdiag_collect(); + close(g_log_fd); return 0; } diff --git a/source/tools/detect/iosdiag/latency/format_json.c b/source/tools/detect/iosdiag/latency/format_json.c index a01a75289fd5c018646ecff281dbd6bdf9878725..259ae9a536e8d49c45addd2162eccfdb7781fb73 100644 --- a/source/tools/detect/iosdiag/latency/format_json.c +++ b/source/tools/detect/iosdiag/latency/format_json.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "iosdiag.h" #include "format_json.h" @@ -38,7 +39,7 @@ struct ts_info g_delays[] = { {"complete", IO_COMPLETE_TIME_POINT}, }; -static char *g_check_date; +static char g_check_date[24]; static char *point_idx_to_str(int idx) { @@ -64,14 +65,19 @@ static char *delay_idx_to_str(int idx) void set_check_time_date(void) { - time_t t; - struct tm *date; - - t = time(NULL); - - date = localtime(&t); - g_check_date = asctime(date); - g_check_date[24] = '\0'; + struct timeval tv; + struct tm *p; + + gettimeofday(&tv, NULL); + p = localtime(&tv.tv_sec); + sprintf(g_check_date, "%d-%d-%d %d:%d:%d.%ld", + 1900+p->tm_year, + 1+p->tm_mon, + p->tm_mday, + p->tm_hour, + p->tm_min, + p->tm_sec, + tv.tv_usec / 1000); } static char *get_check_time_date(void) @@ -121,15 +127,15 @@ static int is_disk_delay(struct iosdiag_req *iop) return 1; } -void point_convert_to_json(void *dest, void *src, unsigned int seq) +void point_convert_to_json(void *dest, void *src) { int i; struct iosdiag_req *iop = src; sprintf(dest, - "{\"seq\":\"%u\"," + "{\"time\":\"%s\"," "\"diskname\":\"%s\"," - "\"points\":[", seq, iop->diskname); + "\"points\":[", get_check_time_date(), iop->diskname); for (i = 0; i < MAX_POINT; i++) { if (!iop->ts[i]) continue; @@ -142,7 +148,7 @@ void point_convert_to_json(void *dest, void *src, unsigned int seq) sprintf(dest + strlen(dest), "%s", "]}\n"); } -void delay_convert_to_json(void *dest, void *src, unsigned int seq) +void delay_convert_to_json(void *dest, void *src) { int i, n; int skip = 0; @@ -150,9 +156,9 @@ void delay_convert_to_json(void *dest, void *src, unsigned int seq) struct iosdiag_req *iop = src; sprintf(dest, - "{\"seq\":\"%u\"," + "{\"time\":\"%s\"," "\"diskname\":\"%s\",", - seq, + get_check_time_date(), iop->diskname); for (i = 0, n = 0; i < MAX_POINT; i++) { if (i == IO_START_POINT) { @@ -183,7 +189,7 @@ void delay_convert_to_json(void *dest, void *src, unsigned int seq) sprintf(dest + strlen(dest), "%s", "]}\n"); } -void summary_convert_to_json(void *dest, void *src, unsigned int seq) +void summary_convert_to_json(void *dest, void *src) { char cpu[24] = {0}; char component[16] = {0}; @@ -204,8 +210,7 @@ void summary_convert_to_json(void *dest, void *src, unsigned int seq) iop->cpu[0], iop->cpu[1], iop->cpu[2]); //blk_rq_op_name(iop->cmd_flags, buf, sizeof(buf)); sprintf(dest, - "{\"seq\":\"%u\"," - "\"time\":\"%s\"," + "{\"time\":\"%s\"," "\"abnormal\":\"%s delay (%lu:%lu us)\"," "\"diskname\":\"%s\"," "\"iotype\":\"%s\"," @@ -214,7 +219,6 @@ void summary_convert_to_json(void *dest, void *src, unsigned int seq) "\"comm\":\"%s\"," "\"pid\":%d," "\"cpu\":\"%s\"}\n", - seq, get_check_time_date(), maxdelay_component, max_delay, diff --git a/source/tools/detect/iosdiag/latency/format_json.h b/source/tools/detect/iosdiag/latency/format_json.h index 63de88df212bd2e0f5f308fceb062569bef368e6..ece2b8c9a234aacc646cf605a7a165dc787f4968 100644 --- a/source/tools/detect/iosdiag/latency/format_json.h +++ b/source/tools/detect/iosdiag/latency/format_json.h @@ -3,8 +3,8 @@ #define JSON_BUFFER_SIZE 4096 void set_check_time_date(void); -void summary_convert_to_json(void *dest, void *src, unsigned int seq); -void delay_convert_to_json(void *dest, void *src, unsigned int seq); -void point_convert_to_json(void *dest, void *src, unsigned int seq); +void summary_convert_to_json(void *dest, void *src); +void delay_convert_to_json(void *dest, void *src); +void point_convert_to_json(void *dest, void *src); #endif diff --git a/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c index 1f801e14b45085d3392bb9dfd029c4ba98860754..cd1909210cd2b25b574ad21601090cf2c93830fc 100644 --- a/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c +++ b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c @@ -4,7 +4,7 @@ struct bpf_map_def SEC("maps") iosdiag_virtblk_maps = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(pid_t), .value_size = sizeof(unsigned long), - .max_entries = 20480, + .max_entries = 2048, }; SEC("kprobe/virtio_queue_rq")