diff --git a/0023-blkparse-Fix-up-the-sector-and-length-of-split-compl.patch b/0023-blkparse-Fix-up-the-sector-and-length-of-split-compl.patch new file mode 100644 index 0000000000000000000000000000000000000000..19df826472ca4a4bf9f1ac6be0433ba32c832c44 --- /dev/null +++ b/0023-blkparse-Fix-up-the-sector-and-length-of-split-compl.patch @@ -0,0 +1,300 @@ +From e81829a565162d41e2f551dc5e0f74f22f319554 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Mon, 13 Apr 2020 21:01:52 +0200 +Subject: [PATCH] blkparse: Fix up the sector and length of split completions + +When a split io completes, the sector and length of the completion event refer +to the last part of the original request. This is in conflict with the +blkparse manual page, makes the blkparse output difficult to read, and leads to +incorrect statistics. Fix up the sector and length of split completion events +to match the original request. + +To achieve that, slightly extend the existing event tracking infrastructure to +track all parts of a split request. We could almost get by tracking only the +last part of a split, but that wouldn't quite work correctly for splits of +splits. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Jens Axboe +--- + blkparse.c | 136 +++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 102 insertions(+), 34 deletions(-) + +diff --git a/blkparse.c b/blkparse.c +index fcc0ccd..796059b 100644 +--- a/blkparse.c ++++ b/blkparse.c +@@ -262,17 +262,21 @@ static struct trace *t_alloc_list; + /* + * for tracking individual ios + */ +-struct io_track { +- struct rb_node rb_node; +- ++struct io_track_req { + struct process_pid_map *ppm; +- __u64 sector; + unsigned long long allocation_time; + unsigned long long queue_time; + unsigned long long dispatch_time; + unsigned long long completion_time; + }; + ++struct io_track { ++ struct rb_node rb_node; ++ struct io_track_req *req; ++ struct io_track *next; ++ __u64 sector; ++}; ++ + static int ndevices; + static struct per_dev_info *devices; + static char *get_dev_name(struct per_dev_info *, char *, int); +@@ -992,6 +996,11 @@ static struct io_track *__find_track(struct per_dev_info *pdi, __u64 sector) + return NULL; + } + ++static inline struct io_track *first_iot(struct io_track_req *req) ++{ ++ return (struct io_track *)(req + 1); ++} ++ + static struct io_track *find_track(struct per_dev_info *pdi, pid_t pid, + __u64 sector) + { +@@ -999,15 +1008,20 @@ static struct io_track *find_track(struct per_dev_info *pdi, pid_t pid, + + iot = __find_track(pdi, sector); + if (!iot) { +- iot = malloc(sizeof(*iot)); +- iot->ppm = find_ppm(pid); +- if (!iot->ppm) +- iot->ppm = add_ppm_hash(pid, "unknown"); ++ struct io_track_req *req; ++ ++ req = malloc(sizeof(*req) + sizeof(*iot)); ++ req->ppm = find_ppm(pid); ++ if (!req->ppm) ++ req->ppm = add_ppm_hash(pid, "unknown"); ++ req->allocation_time = -1ULL; ++ req->queue_time = -1ULL; ++ req->dispatch_time = -1ULL; ++ req->completion_time = -1ULL; ++ iot = first_iot(req); ++ iot->req = req; ++ iot->next = NULL; + iot->sector = sector; +- iot->allocation_time = -1ULL; +- iot->queue_time = -1ULL; +- iot->dispatch_time = -1ULL; +- iot->completion_time = -1ULL; + track_rb_insert(pdi, iot); + } + +@@ -1039,14 +1053,16 @@ static void log_track_frontmerge(struct per_dev_info *pdi, + static void log_track_getrq(struct per_dev_info *pdi, struct blk_io_trace *t) + { + struct io_track *iot; ++ struct io_track_req *req; + + if (!track_ios) + return; + + iot = find_track(pdi, t->pid, t->sector); +- io_warn_unless(t, iot->allocation_time == -1ULL, ++ req = iot->req; ++ io_warn_unless(t, req->allocation_time == -1ULL, + "confused about %s time", "allocation"); +- iot->allocation_time = t->time; ++ req->allocation_time = t->time; + } + + /* +@@ -1056,14 +1072,34 @@ static void log_track_getrq(struct per_dev_info *pdi, struct blk_io_trace *t) + static void log_track_queue(struct per_dev_info *pdi, struct blk_io_trace *t) + { + struct io_track *iot; ++ struct io_track_req *req; + + if (!track_ios) + return; + + iot = find_track(pdi, t->pid, t->sector); +- io_warn_unless(t, iot->dispatch_time == -1ULL, ++ req = iot->req; ++ io_warn_unless(t, req->dispatch_time == -1ULL, + "confused about %s time", "dispatch"); +- iot->dispatch_time = t->time; ++ req->dispatch_time = t->time; ++} ++ ++static void log_track_split(struct per_dev_info *pdi, struct blk_io_trace *t) ++{ ++ struct io_track *iot, *split; ++ ++ /* ++ * With a split request, the completion event will refer to the last ++ * part of the original request, but other events might refer to other ++ * parts. ++ */ ++ iot = find_track(pdi, t->pid, t->sector); ++ split = malloc(sizeof(*iot)); ++ split->req = iot->req; ++ split->next = iot->next; ++ iot->next = split; ++ split->sector = iot->sector + t_sec(t); ++ track_rb_insert(pdi, split); + } + + /* +@@ -1074,22 +1110,24 @@ static unsigned long long log_track_insert(struct per_dev_info *pdi, + { + unsigned long long elapsed; + struct io_track *iot; ++ struct io_track_req *req; + + if (!track_ios) + return -1; + + iot = find_track(pdi, t->pid, t->sector); +- io_warn_unless(t, iot->queue_time == -1ULL, ++ req = iot->req; ++ io_warn_unless(t, req->queue_time == -1ULL, + "confused about %s time", "queue"); +- iot->queue_time = t->time; ++ req->queue_time = t->time; + +- if (iot->allocation_time == -1ULL) ++ if (req->allocation_time == -1ULL) + return -1; + +- elapsed = iot->queue_time - iot->allocation_time; ++ elapsed = req->queue_time - req->allocation_time; + + if (per_process_stats) { +- struct per_process_info *ppi = find_ppi(iot->ppm->pid); ++ struct per_process_info *ppi = find_ppi(req->ppm->pid); + int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; + + if (ppi && elapsed > ppi->longest_allocation_wait[w]) +@@ -1107,6 +1145,7 @@ static unsigned long long log_track_issue(struct per_dev_info *pdi, + { + unsigned long long elapsed = -1ULL; + struct io_track *iot; ++ struct io_track_req *req; + + if (!track_ios) + return -1; +@@ -1122,14 +1161,15 @@ static unsigned long long log_track_issue(struct per_dev_info *pdi, + return -1; + } + +- io_warn_unless(t, iot->dispatch_time == -1ULL, ++ req = iot->req; ++ io_warn_unless(t, req->dispatch_time == -1ULL, + "confused about %s time", "dispatch"); +- iot->dispatch_time = t->time; +- if (iot->queue_time != -1ULL) +- elapsed = iot->dispatch_time - iot->queue_time; ++ req->dispatch_time = t->time; ++ if (req->queue_time != -1ULL) ++ elapsed = req->dispatch_time - req->queue_time; + + if (elapsed != -1ULL && per_process_stats) { +- struct per_process_info *ppi = find_ppi(iot->ppm->pid); ++ struct per_process_info *ppi = find_ppi(req->ppm->pid); + int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; + + if (ppi && elapsed > ppi->longest_dispatch_wait[w]) +@@ -1139,6 +1179,25 @@ static unsigned long long log_track_issue(struct per_dev_info *pdi, + return elapsed; + } + ++static void fixup_complete(struct per_dev_info *pdi, struct blk_io_trace *t) ++{ ++ struct io_track *iot; ++ __u64 start_sector; ++ ++ iot = __find_track(pdi, t->sector); ++ if (!iot) ++ return; ++ ++ /* ++ * When a split io completes, the sector and length of the completion ++ * refer to the last part of the original request. Fix the sector and ++ * length of the complete event to match the original request. ++ */ ++ start_sector = first_iot(iot->req)->sector; ++ t->bytes += (t->sector - start_sector) << 9; ++ t->sector = start_sector; ++} ++ + /* + * return time between dispatch and complete + */ +@@ -1146,7 +1205,8 @@ static unsigned long long log_track_complete(struct per_dev_info *pdi, + struct blk_io_trace *t) + { + unsigned long long elapsed = -1ULL; +- struct io_track *iot; ++ struct io_track *iot, *next; ++ struct io_track_req *req; + + if (!track_ios) + return -1; +@@ -1160,14 +1220,15 @@ static unsigned long long log_track_complete(struct per_dev_info *pdi, + return -1; + } + +- io_warn_unless(t, iot->completion_time == -1ULL, ++ req = iot->req; ++ io_warn_unless(t, req->completion_time == -1ULL, + "confused about %s time", "completion"); +- iot->completion_time = t->time; +- if (iot->dispatch_time != -1ULL) +- elapsed = iot->completion_time - iot->dispatch_time; ++ req->completion_time = t->time; ++ if (req->dispatch_time != -1ULL) ++ elapsed = req->completion_time - req->dispatch_time; + + if (elapsed != -1ULL && per_process_stats) { +- struct per_process_info *ppi = find_ppi(iot->ppm->pid); ++ struct per_process_info *ppi = find_ppi(req->ppm->pid); + int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; + + if (ppi && elapsed > ppi->longest_completion_wait[w]) +@@ -1177,8 +1238,13 @@ static unsigned long long log_track_complete(struct per_dev_info *pdi, + /* + * kill the trace, we don't need it after completion + */ +- rb_erase(&iot->rb_node, &pdi->rb_track); +- free(iot); ++ for (iot = first_iot(req); iot; iot = next) { ++ next = iot->next; ++ rb_erase(&iot->rb_node, &pdi->rb_track); ++ if (iot != first_iot(req)) ++ free(iot); ++ } ++ free(req); + + return elapsed; + } +@@ -1629,6 +1695,7 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi, + case __BLK_TA_COMPLETE: + if (pdi->cur_depth[w]) + pdi->cur_depth[w]--; ++ fixup_complete(pdi, t); + account_c(t, pci, w, t->bytes); + log_complete(pdi, pci, t, "C"); + break; +@@ -1644,6 +1711,7 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi, + log_unplug(pci, t, "UT"); + break; + case __BLK_TA_SPLIT: ++ log_track_split(pdi, t); + log_split(pci, t, "X"); + break; + case __BLK_TA_BOUNCE: +-- +2.43.0 + diff --git a/blktrace.spec b/blktrace.spec index 03645e1bdb1847fd771042321a401585c03c3420..c33fd9f6feae2b04601412cdba218c56ceb9dbc2 100644 --- a/blktrace.spec +++ b/blktrace.spec @@ -1,6 +1,6 @@ Name: blktrace Version: 1.2.0 -Release: 27 +Release: 28 Summary: Block IO tracer in the Linux kernel License: GPLv2+ Source: http://brick.kernel.dk/snaps/blktrace-%{version}.tar.bz2 @@ -33,6 +33,7 @@ Patch19: 0019-fix-parallel-build-of-btt-and-blkiomon.patch Patch20: 0020-blkparse-skip-check_cpu_map-with-pipe-input.patch Patch21: 0021-blkparse-fix-incorrectly-sized-memset-in-check_cpu_m.patch Patch22: 0022-fix-hang-when-BLKTRACESETUP-fails-and-o-is-used.patch +Patch23: 0023-blkparse-Fix-up-the-sector-and-length-of-split-compl.patch %description blktrace is a block layer IO tracing mechanism which provides detailed @@ -67,6 +68,9 @@ make dest=%{buildroot} prefix=%{buildroot}/%{_prefix} mandir=%{buildroot}/%{_man %{_mandir}/man8/* %changelog +* Thu Mar 20 2025 kouwenqi - 1.2.0-28 +- blkparse: Fix up the sector and length of split completions + * Tue Sep 24 2024 wangxiao - 1.2.0-27 - DESC: backport upstream patches blkparse skip check_cpu_map with pipe input