From 7ca975798c09d239710be47c328166206f7a42d9 Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Thu, 23 Dec 2021 17:41:08 +0800 Subject: [PATCH 01/10] iosdiag: Add iosdiag tools Signed-off-by: guangshui.li --- source/tools/detect/iosdiag/Makefile | 3 + source/tools/detect/iosdiag/README.md | 157 ++++++++++ .../detect/iosdiag/data_analysis/Makefile | 5 + .../data_analysis/iosdiag_data_analysis.py | 240 +++++++++++++++ source/tools/detect/iosdiag/entry/Makefile | 3 + source/tools/detect/iosdiag/entry/iosdiag.sh | 124 ++++++++ source/tools/detect/iosdiag/latency/Makefile | 9 + .../iosdiag/latency/bpf_iosdiag_common.h | 203 +++++++++++++ source/tools/detect/iosdiag/latency/collect.c | 275 ++++++++++++++++++ .../tools/detect/iosdiag/latency/ebpf_load.h | 8 + .../detect/iosdiag/latency/format_json.c | 230 +++++++++++++++ .../detect/iosdiag/latency/format_json.h | 10 + source/tools/detect/iosdiag/latency/iosdiag.h | 47 +++ .../detect/iosdiag/latency/iosdiag_nvme.bpf.c | 77 +++++ .../detect/iosdiag/latency/iosdiag_scsi.bpf.c | 31 ++ .../iosdiag/latency/iosdiag_virtblk.bpf.c | 61 ++++ source/tools/detect/iosdiag/latency/main.c | 72 +++++ 17 files changed, 1555 insertions(+) create mode 100644 source/tools/detect/iosdiag/Makefile create mode 100644 source/tools/detect/iosdiag/README.md create mode 100644 source/tools/detect/iosdiag/data_analysis/Makefile create mode 100755 source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py create mode 100644 source/tools/detect/iosdiag/entry/Makefile create mode 100755 source/tools/detect/iosdiag/entry/iosdiag.sh create mode 100644 source/tools/detect/iosdiag/latency/Makefile create mode 100644 source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h create mode 100644 source/tools/detect/iosdiag/latency/collect.c create mode 100644 source/tools/detect/iosdiag/latency/ebpf_load.h create mode 100644 source/tools/detect/iosdiag/latency/format_json.c create mode 100644 source/tools/detect/iosdiag/latency/format_json.h create mode 100644 source/tools/detect/iosdiag/latency/iosdiag.h create mode 100644 source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c create mode 100644 source/tools/detect/iosdiag/latency/iosdiag_scsi.bpf.c create mode 100644 source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c create mode 100644 source/tools/detect/iosdiag/latency/main.c diff --git a/source/tools/detect/iosdiag/Makefile b/source/tools/detect/iosdiag/Makefile new file mode 100644 index 00000000..4d1f7df8 --- /dev/null +++ b/source/tools/detect/iosdiag/Makefile @@ -0,0 +1,3 @@ +target := iosdiag + +include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/iosdiag/README.md b/source/tools/detect/iosdiag/README.md new file mode 100644 index 00000000..727457b4 --- /dev/null +++ b/source/tools/detect/iosdiag/README.md @@ -0,0 +1,157 @@ +# iosdiag +iosdiag (IO storage diagnostics tools), IO存储诊断工具,目前包括已实现的IO延迟诊断功能以及正在实现的IO夯诊断或其他功能等等 + +# 代码目录结构 +entry ------- IO诊断功能入口代码 +latency ------- IO延迟诊断代码 +data_analysis ------- IO诊断数据分析与结果输出代码 + +# 运行IO延迟诊断功能前置条件 +由于基于eBPF实现,因此需要内核支持并启动eBPF + +# 编译 +在编译sysak的之前,需要在执行configure配置的时候加上--enable-libbpf --enable-target-iosdiag才能编译进sysak + +# 使用 +## 参数说明 +###sysak iosdiag -h +Usage: sysak iosdiag [options] subcmd [cmdargs]] + subcmd: + latency, 执行io延迟诊断功能 + cmdargs: + -h, 跟在子命令之后显示功能支持参数 + options: + -u url, 指定url,将会通过curl命令把诊断日志文件打包上传到此url,不指定不上传 + -s latency, 停止诊断 + +###sysak iosdiag latency -h +Usage: latency [OPTION] disk_devname + +options: + -t threshold, 指定超时IO的时间阈值(单位ms),IO时延诊断将过滤完成耗时超过此阈值的IO(默认1000ms) + -T time, 指定诊断运行时长(单位秒)后自动退出(默认10秒) + +e.g. + latency vda 诊断访问磁盘vda上耗时1000ms的IO,诊断10s后自动退出 + latency -t 10 vda 诊断访问磁盘vda上耗时10ms的IO,诊断10s后自动退出 + latency -t 10 -T 30 vda 诊断访问磁盘vda上耗时10ms的IO,诊断30s后自动退出 + +## 输出说明 +### 控制台输出 +#### 运行命令: +sysak iosdiag latency -t 1 -T 20 vda +#### 运行过程日志: +start iosdiag_virtblk load bpf +load iosdiag_virtblk bpf success +running...done +#### 运行结果输出:两个维度,整体IO的延迟分布情况(以百分比展示整体IO的延迟点)+输出延迟最大的前TOPn个IO的最大延迟点以及总延迟 +15 IOs of disk vda over 1 ms, delay distribution: +os(block) delay: 17.147% +os(driver) delay: 0.009% +disk delay: 82.84% +os(complete) delay: 0.002% +The first 10 IOs with the largest delay, more details: +seq comm pid iotype datalen abnormal(delay:totaldelay) +11 kworker/u12:2 11943 W 4096 disk delay (145.56:256.88 ms) +12 kworker/u12:2 11943 W 4096 disk delay (145.46:256.66 ms) +15 kworker/u12:2 11943 W 4096 disk delay (217.39:217.51 ms) +14 jbd2/vda1-8 354 FWFS 4096 os(block) delay (143.42:152.93 ms) +13 kworker/u12:2 11943 W 4096 disk delay (145.05:145.30 ms) +3 kworker/u12:2 11943 W 4096 disk delay (113.80:114.00 ms) +5 kworker/u12:2 11943 W 8192 disk delay (112.97:113.14 ms) +1 kworker/u12:2 11943 W 4096 disk delay (111.79:111.96 ms) +10 kworker/u12:2 11943 W 8192 disk delay (111.62:111.78 ms) +4 kworker/u12:2 11943 W 4096 disk delay (111.11:111.30 ms) +more details see /var/log/sysak/iosdiag/latency/result.log* + +### 日志文件说明 +日志文件中的数据均以磁盘为单位以json数组的方式呈现 +#### /var/log/sysak/iosdiag/latency/result.log +该日志文件描述的是每一个延迟IO的事件信息,通过seq可以从result.log.seq文件中索引到IO的延迟信息 +{ + "summary":[ //以磁盘为一个单位的数组 + { + "diskname":"vda", //磁盘盘符 + "slow ios":[ //每一个该磁盘下的io为一个单位的数组 + { + "seq":"11", //通过这个序号可以从result.log.seq中找到此IO的延迟分布 + "time":"Thu Dec 23 14:42:10 2021", //检测到次超时IO的时间 + "abnormal":"disk delay (145.56:256.88 ms)", //此IO的延迟最大的点(延迟最大的组件的延迟:总延迟) + "iotype":"W", //此IO类型 + "sector":23695488, //此IO访问磁盘的具体偏移位置 + "datalen":4096, //次IO访问磁盘的数据量 + "comm":"kworker/u12:2", //发起此IO的进程 + "pid":11943, //进程ID + "cpu":"2 -> 4 -> 4" //发起此IO的CPU -> 响应IO完成之后磁盘中断的CPU -> 磁盘IO完成后执行软中断的CPU + //如只显示一个CPU编号,说明发起IO和执行中断的CPU相同,要注意也有磁盘是没有软中断流程的 + }, + {第二个IO事件信息}, + ... + ] + }, + {第二个磁盘}, + ... + ] +} +#### /var/log/sysak/iosdiag/latency/result.log.seq +该日志文件描述的是每一个延迟IO在各组建的延迟分布,通过seq可以从result.log文件中索引到IO的详细信息 +{ + "summary":[ //以磁盘为一个单位的数组 + { + "diskname":"vda", //磁盘盘符 + "slow ios":[ //每一个该磁盘下的io延迟信息为一个单位的数组 + { + "seq":"11", //通过这个序号可以从result.log中找到此IO的详细信息 + "totaldelay":256884, //此IO总耗时 + "delays":[ //以此IO的每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete + { + "component":"block", //组建名 + "delay":111300 //此IO在该组建的时延,单位us + }, + { + "component":"driver", + "delay":25 + }, + { + "component":"disk", + "delay":145557 + }, + { + "component":"complete", + "delay":2 + } + ] + }, + {第二个IO延迟信息}, + ... + ] + }, + {第二个磁盘}, + ... + ] +} +#### /var/log/sysak/iosdiag/latency/result.log.stat +该日志文件描述的是在磁盘角度,所有IO的延迟分布统计信息 +{ + "summary":[ //以磁盘为一个单位的数组 + { + "diskname":"vda", //磁盘盘符 + "delays":[ //以每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete + { + "component":"os(block)", //组建名 + "percent":"17.147%", //在捕获的该磁盘的IO中,经统计在此组件耗时的百分比 + "max":143422, //在此组件的最大耗时,单位us + "min":76, //在此组件的最小耗时,单位us + "avg":24518 //在此组件的平均耗时,单位us + }, + {"os(driver)" 延迟情况}, + {"disk" 延迟情况}, + { "os(complete)" 延迟情况} + ] + }, + {第二个磁盘}, + ... + ] +} + + diff --git a/source/tools/detect/iosdiag/data_analysis/Makefile b/source/tools/detect/iosdiag/data_analysis/Makefile new file mode 100644 index 00000000..e2dba024 --- /dev/null +++ b/source/tools/detect/iosdiag/data_analysis/Makefile @@ -0,0 +1,5 @@ +target = iosdiag_data_analysis + +mods += $(target) + +include $(SRC)/mk/py.mk diff --git a/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py new file mode 100755 index 00000000..53279b64 --- /dev/null +++ b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- + +import os +import json +import string +from collections import OrderedDict +import argparse +import re + +if os.geteuid() != 0: + print "This program must be run as root. Aborting." + sys.exit(0) + +def execCmd(cmd): + r = os.popen(cmd) + text = r.read() + r.close() + return text + +def humConvert(value): + units = ["B", "KB", "MB", "GB", "TB", "PB"] + size = 1024.0 + for i in range(len(units)): + if (value / size) < 1: + return "%.2f%s/s" % (value, units[i]) + value = value / size + +def timeConvert(value, withUnit): + units = ["us", "ms", "s"] + size = 1000.0 + for i in range(len(units)): + if (value / size) < 1: + if withUnit: + return "%.2f %s" % (value, units[i]) + else: + return "%.2f" % (value) + value = value / size + +class latencyAnalysis: + def __init__(self): + self.delayStatDicts = {} + self.delayDicts = {} + self.summaryDicts = {} + self.totalIosDicts = {} + self.totalDelayDicts = {} + self.diskIdxDicts = {} + self.totalDiskCnt = 0 + self.threshold = 0 + self.componentDicts = OrderedDict([('os(block)',0),('os(driver)',1),\ + ('disk',2),('os(complete)',3)]) + self.delayStatJsonStr = \ + '{ \ + "diskname":"","delays":[ \ + {"component":"os(block)","percent":"","max":0,"min":1000000000,"avg":0},\ + {"component":"os(driver)","percent":"","max":0,"min":1000000000,"avg":0},\ + {"component":"disk","percent":"","max":0,"min":1000000000,"avg":0}, \ + {"component":"os(complete)","percent":"","max":0,"min":1000000000,"avg":0}]\ + }' + newDelayStatDict = json.loads("["+self.delayStatJsonStr + "]", object_pairs_hook=OrderedDict) + self.delayStatDicts.setdefault('summary', newDelayStatDict) + self.entryDictJsonStr = \ + '{ \ + "diskname":"",\ + "slow ios":[] \ + }' + newSummaryDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict) + self.summaryDicts.setdefault('summary', newSummaryDict) + newDelayDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict) + self.delayDicts.setdefault('summary', newDelayDict) + + def __newDiskDict(self, disk): + if self.totalDiskCnt != 0: + newDelayStatDict = json.loads(self.delayStatJsonStr, object_pairs_hook=OrderedDict) + self.delayStatDicts['summary'].append(newDelayStatDict) + newSummaryDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict) + self.summaryDicts['summary'].append(newSummaryDict) + newDelayDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict) + self.delayDicts['summary'].append(newDelayDict) + self.delayStatDicts['summary'][self.totalDiskCnt]['diskname'] = disk + self.summaryDicts['summary'][self.totalDiskCnt]['diskname'] = disk + self.delayDicts['summary'][self.totalDiskCnt]['diskname'] = disk + self.totalDelayDicts.setdefault(disk, 0) + self.totalIosDicts.setdefault(disk, 0) + self.diskIdxDicts.setdefault(disk, self.totalDiskCnt) + self.totalDiskCnt += 1; + + def processLatencyDelays(self, sDict): + diskIdxDicts = self.diskIdxDicts + totalDelayDicts = self.totalDelayDicts + componentDicts = self.componentDicts + delayStatDicts = self.delayStatDicts + delayDicts = self.delayDicts + + disk = sDict['diskname'] + del sDict['diskname'] + totalDelayDicts[disk] += sDict['totaldelay'] + diskIdx = diskIdxDicts[disk] + delayDicts['summary'][diskIdx]['slow ios'].append(sDict) + for component,idx in componentDicts.items(): + delay = sDict['delays'][idx]['delay'] + if delay > delayStatDicts['summary'][diskIdx]['delays'][idx]['max']: + delayStatDicts['summary'][diskIdx]['delays'][idx]['max'] = delay + if delay < delayStatDicts['summary'][diskIdx]['delays'][idx]['min']: + delayStatDicts['summary'][diskIdx]['delays'][idx]['min'] = delay + delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] += delay + + def processLatencySummary(self, sDict): + diskIdxDicts = self.diskIdxDicts + summaryDicts = self.summaryDicts + + disk = sDict['diskname'] + diskIdx = diskIdxDicts[disk] + del sDict['diskname'] + listAbnormal=[i for i in sDict['abnormal'].split(' ') if i != '']; + delay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[0]), False) + totalDelay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[1]), True) + sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+delay+":"+totalDelay+")" + summaryDicts['summary'][diskIdx]['slow ios'].append(sDict) + + def processOneLatencySeq(self, sDict): + totalIosDicts = self.totalIosDicts + + disk = sDict['diskname'] + if disk not in totalIosDicts.keys(): + self.__newDiskDict(disk) + + totalIosDicts[disk] += 1 + if "abnormal" in sDict: + self.processLatencySummary(sDict) + else: + self.processLatencyDelays(sDict) + + def latencyCalculate(self): + diskIdxDicts = self.diskIdxDicts + totalIosDicts = self.totalIosDicts + totalDelayDicts = self.totalDelayDicts + componentDicts = self.componentDicts + delayStatDicts = self.delayStatDicts + summaryDicts = self.summaryDicts + delayDicts = self.delayDicts + + for disk, diskIdx in diskIdxDicts.items(): + totalIosDicts[disk] /= 2 + totalIos = totalIosDicts[disk] + maxPercent = 0 + avgTotalDelay = totalDelayDicts[disk] / totalIos + for component,idx in componentDicts.items(): + delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] /= totalIos + avgDelay = delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] + #percent = avgDelay * 100.0 / avgTotalDelay + percent = round((avgDelay * 100.0 / avgTotalDelay), 3) + if percent > maxPercent: + maxPercent = percent + delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] = str(percent)+"%" + + summaryDicts['summary'][diskIdx]['slow ios']=\ + sorted(summaryDicts['summary'][diskIdx]['slow ios'],\ + key=lambda e:float(re.split(':| ', e['abnormal'])[-2]),\ + reverse=True) + delayDicts['summary'][diskIdx]['slow ios']=\ + sorted(delayDicts['summary'][diskIdx]['slow ios'],\ + key=lambda e:e['totaldelay'],\ + reverse=True) + + def latencyPrint(self, threshold): + diskIdxDicts = self.diskIdxDicts + totalIosDicts = self.totalIosDicts + summaryDicts = self.summaryDicts + delayStatDicts = self.delayStatDicts + componentDicts = self.componentDicts + + for disk, diskIdx in diskIdxDicts.items(): + totalIos = totalIosDicts[disk] + print("\n%d IOs of disk %s over %d ms, delay distribution:" %(totalIos, disk, threshold)) + for component,idx in componentDicts.items(): + percent = delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] + print("%-12s delay: %s" %(component, percent)) + + end = totalIos if totalIos < 10 else 10 + print("The first %d IOs with the largest delay, more details:" % end) + print("seq".ljust(6)+"comm".ljust(20)+"pid".ljust(10)+"iotype".ljust(8)+\ + "datalen".ljust(16)+"abnormal(delay:totaldelay)".ljust(40)) + + for i in range(0,end): + eDict=summaryDicts['summary'][diskIdx]['slow ios'][i] + print(str(eDict["seq"]).ljust(6)+eDict["comm"].ljust(20)+\ + str(eDict["pid"]).ljust(10)+eDict["iotype"].ljust(8)+\ + str(eDict["datalen"]).ljust(16)+eDict["abnormal"].ljust(40)) + +def latencyDataAnalysis(resultSeqFile, threshold): + analysis = latencyAnalysis() + f = open(resultSeqFile) + for line in f.readlines(): + try: + sDict = json.loads(line, object_pairs_hook=OrderedDict) + except ValueError: + continue + analysis.processOneLatencySeq(sDict) + f.close() + if analysis.totalDiskCnt == 0: + print("\n0 IOs over %d ms, everything is ok !^o^ ~" % int(threshold)) + return + analysis.latencyCalculate() + + f = open(resultSeqFile.strip('.seq')+".stat", 'w+') + f.write(json.dumps(analysis.delayStatDicts)) + f.close() + + f = open(resultSeqFile.strip('.seq'), 'w+') + f.write(json.dumps(analysis.summaryDicts)) + f.close() + + f = open(resultSeqFile, 'w+') + f.write(json.dumps(analysis.delayDicts)) + f.close() + analysis.latencyPrint(int(threshold)) + print("more details see %s*" % resultSeqFile.strip('.seq')) + +def main(): + examples = """e.g. + ./iosdiag_data_analysis.py -L -s -t 1000 -f ./result.log.seq //Statistic IO delay diagnosis results + ./iosdiag_data_analysis.py -L -g -t 1000 -f ./result.log.seq //Display IO delay diagnostic results graphically + """ + parser = argparse.ArgumentParser( + description="Analyze IO diagnostic data.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + parser.add_argument('-L','--latency', action='store_true', help='Analyze IO delay diagnostic data.') + parser.add_argument('-s','--stat', action='store_true', help='Statistic IO diagnosis results.') + parser.add_argument('-g','--graph', action='store_true', help='Display IO diagnostic results graphically.') + parser.add_argument('-t','--threshold', help='Specifies the threshold for the exception io.') + parser.add_argument('-f','--file', help='Specify the target data file to analyze.') + args = parser.parse_args() + + if args.latency: + latencyDataAnalysis(args.file, 1000 if args.threshold == None else args.threshold) + +if __name__ == "__main__": + main() + diff --git a/source/tools/detect/iosdiag/entry/Makefile b/source/tools/detect/iosdiag/entry/Makefile new file mode 100644 index 00000000..f2f54ff8 --- /dev/null +++ b/source/tools/detect/iosdiag/entry/Makefile @@ -0,0 +1,3 @@ +target = iosdiag + +include $(SRC)/mk/sh.mk diff --git a/source/tools/detect/iosdiag/entry/iosdiag.sh b/source/tools/detect/iosdiag/entry/iosdiag.sh new file mode 100755 index 00000000..10e442c5 --- /dev/null +++ b/source/tools/detect/iosdiag/entry/iosdiag.sh @@ -0,0 +1,124 @@ +#!/bin/sh +#****************************************************************# +# ScriptName: iosdiag.sh +# Author: guangshui.lgs@alibaba-inc.com +# Create Date: 2021-07-02 11:44 +# Modify Author: $SHTERM_REAL_USER@alibaba-inc.com +# Modify Date: 2021-07-02 11:45 +# Function: +#***************************************************************# +if [ "$SYSAK_WORK_PATH" != "" ]; then +WORK_PATH=$SYSAK_WORK_PATH +else +WORK_PATH=/usr/local/sbin/.sysak_compoents +fi +TOOLS_PATH=$WORK_PATH/tools/`uname -r` +LIB_PATH=$WORK_PATH/lib/`uname -r` +latency_bin=$WORK_PATH/tools/latency +data_analysis=$WORK_PATH/tools/iosdiag_data_analysis +iosdiag_dir="/var/log/sysak/iosdiag" +logfile="$iosdiag_dir/$1/result.log.seq" +logfile_arg="-f $logfile" + +function usage() { + echo "Usage: sysak iosdiag [options] subcmd [cmdargs]]" + echo " subcmd:" + echo " latency, io latency diagnosis" + echo " cmdargs:" + echo " -h, help info" + echo " options:" + echo " -u url, transfer datafile to remote url" + echo " -s latency|[..], stop diagnosis" + #echo " -f logfile, output log file" +} + +function list() { + ls $TOOLS_PATH +} + +upload_data() { + datapath=$iosdiag_dir/$1 + cd $datapath + tar -zcf iosdiag_$1.tar.gz ./* + curl -i -q -X PUT -T iosdiag_$1.tar.gz $url + rm -f iosdiag_$1.tar.gz +} + +datafile_analysis() { + python $data_analysis --$1 --stat --file $logfile $threshold_arg +} + +enable_latency() { + if [ ! -e "$latency_bin" ]; then + echo "$latency_bin not found" + echo "iosdiag latency not support '$(uname -r)', please report to the developer" + exit -1 + fi + threshold=$(echo "$*"|awk -F "-t" '{print $2}'|awk '{print $1}') + [ "$threshold" != "" ] && { threshold_arg="-t $threshold"; } + { + flock -n 3 + [ $? -eq 1 ] && { echo "another latency is running."; exit -1; } + trap disable_latency SIGINT SIGTERM SIGQUIT + #mkdir -p `dirname $datafile` + chmod +x $latency_bin + rm $logfile_arg + $SYSAK_WORK_PATH/../sysak btf + $latency_bin $logfile_arg $* & + wait $! + disable_latency + } 3<> /tmp/latency.lock +} + +disable_latency() { + pid=`ps -ef | grep "\$latency_bin" | awk '{print $2}'` + if [ "$pid" != "" ] + then + kill -9 $pid 2>/dev/null + fi + + datafile_analysis latency + if [ -n "$url" ]; then + upload_data latency + fi + exit 0 +} + + +#execute command,every command need such args: +# -h/--help: command usage +# -f/--file: output files, default stdout +# output format jason +# -d/--disable +function execute() { + #echo cmd:$1 ${*:2} + enable_$1 ${*:2} +} + +while getopts 'hs:u:' OPT; do + case $OPT in + "u") + url=$OPTARG + ;; + "s") + diag_stop=true + subcmd=$OPTARG + ;; + *) + usage + exit 0 + ;; + esac +done + +if [ $diag_stop ]; then + echo "disable $subcmd" + disable_$subcmd + exit 0 +fi + +subcmd=${@:$OPTIND:1} +subargs=${*:$OPTIND+1}; +[ "$subcmd" != "latency" ] && { echo "not support subcmd $subcmd!!!"; usage; exit -1; } +execute $subcmd $subargs + diff --git a/source/tools/detect/iosdiag/latency/Makefile b/source/tools/detect/iosdiag/latency/Makefile new file mode 100644 index 00000000..7ad4403c --- /dev/null +++ b/source/tools/detect/iosdiag/latency/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = latency + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h new file mode 100644 index 00000000..85663a52 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h @@ -0,0 +1,203 @@ +#ifndef __BPF_IOSDIAG_COMMON_H +#define __BPF_IOSDIAG_COMMON_H + +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct bpf_map_def SEC("maps") iosdiag_maps = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct iosdiag_key), + .value_size = sizeof(struct iosdiag_req), + .max_entries = 2048, +}; + +struct bpf_map_def SEC("maps") iosdiag_maps_targetdevt = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +static inline int iosdiag_pkg_check(void *data, unsigned int len) +{ + return 1; +} + +static unsigned int get_target_devt(void) +{ + unsigned int key = 0; + unsigned int *devt; + + devt = (unsigned int *)bpf_map_lookup_elem(&iosdiag_maps_targetdevt, + &key); + if (devt) + return *devt; + return 0; +} + +static void +init_iosdiag_key(unsigned int dev, unsigned long sector, struct iosdiag_key *key) +{ + key->dev = dev; + key->sector = sector; +} + +static inline int +trace_io_driver_route(struct pt_regs *ctx, struct request *req, enum ioroute_type type) +{ + struct iosdiag_req *ioreq; + struct iosdiag_req new_ioreq = {0}; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + dev_t dev; + sector_t sector; + struct gendisk *rq_disk; + struct hd_struct *part; + struct device *device; + u32 target_devt = get_target_devt(); + + bpf_probe_read(&part, sizeof(struct hd_struct *), &req->part); + bpf_probe_read(&device, sizeof(struct device *), &part->__dev); + bpf_probe_read(&dev, sizeof(dev_t), &device->devt); + if (target_devt && dev != target_devt) + return 0; + + bpf_probe_read(§or, sizeof(sector_t), &req->__sector); + + init_iosdiag_key(dev, sector, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); + if (ioreq) { + if (!ioreq->ts[type]) + ioreq->ts[type] = now; + if (ioreq->diskname[0] == '\0') { + bpf_probe_read(&rq_disk, sizeof(struct gendisk *), &req->rq_disk); + bpf_probe_read(ioreq->diskname, sizeof(ioreq->diskname), &rq_disk->disk_name); + } + if (type == IO_RESPONCE_DRIVER_POINT) + ioreq->cpu[1] = bpf_get_smp_processor_id(); + } else + return 0; + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + return 0; +} + +struct block_getrq_args { + struct trace_entry ent; + unsigned int dev; + unsigned long sector; + unsigned int nr_sector; + char rwbs[8]; + char comm[16]; +}; + +SEC("tracepoint/block/block_getrq") +static int tracepoint_block_getrq(struct block_getrq_args *args) +{ + struct iosdiag_req new_ioreq = {0}; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + u32 target_devt = get_target_devt(); + + if (target_devt && args->dev != target_devt) + return 0; + + new_ioreq.cpu[0] = new_ioreq.cpu[1] = new_ioreq.cpu[2] = -1; + init_iosdiag_key(args->dev, args->sector, &key); + if (pid) + memcpy(new_ioreq.comm, args->comm, sizeof(args->comm)); + new_ioreq.ts[IO_START_POINT] = now; + new_ioreq.pid = pid; + memcpy(new_ioreq.op, args->rwbs, sizeof(args->rwbs)); + new_ioreq.sector = args->sector; + new_ioreq.data_len = args->nr_sector * 512; + new_ioreq.cpu[0] = bpf_get_smp_processor_id(); + bpf_map_update_elem(&iosdiag_maps, &key, &new_ioreq, BPF_ANY); + return 0; +} + +struct block_rq_issue_args { + struct trace_entry ent; + unsigned int dev; + unsigned long sector; + unsigned int nr_sector; + unsigned int bytes; + char rwbs[8]; + char comm[16]; + char cmd[0]; +}; + +SEC("tracepoint/block/block_rq_issue") +static int tracepoint_block_rq_issue(struct block_rq_issue_args *args) +{ + struct iosdiag_req *ioreq; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + int type = IO_ISSUE_DRIVER_POINT; + u32 target_devt = get_target_devt(); + + if (target_devt && args->dev != target_devt) + return 0; + + init_iosdiag_key(args->dev, args->sector, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); + if (ioreq) { + if (ioreq->ts[type]) + type = IO_ISSUE_DEVICE_POINT; + ioreq->ts[type] = now; + + if (args->bytes) + ioreq->data_len = args->bytes; + else if (args->nr_sector) + ioreq->data_len = args->nr_sector * 512; + } else + return 0; + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + return 0; +} + +struct block_rq_complete_args { + struct trace_entry ent; + dev_t dev; + sector_t sector; + unsigned int nr_sector; + int errors; + char rwbs[8]; + char cmd[0]; +}; + +SEC("tracepoint/block/block_rq_complete") +static int tracepoint_block_rq_complete(struct block_rq_complete_args *args) +{ + struct iosdiag_req *ioreq; + struct iosdiag_req new_ioreq = {0}; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + u32 target_devt = get_target_devt(); + + if (target_devt && args->dev != target_devt) + return 0; + + init_iosdiag_key(args->dev, args->sector, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); + if (ioreq) { + if (!ioreq->ts[IO_COMPLETE_TIME_POINT]) + ioreq->ts[IO_COMPLETE_TIME_POINT] = now; + if (ioreq->ts[IO_ISSUE_DEVICE_POINT] && + ioreq->ts[IO_RESPONCE_DRIVER_POINT]) + ioreq->complete = 1; + ioreq->cpu[2] = bpf_get_smp_processor_id(); + } else + return 0; + if (ioreq->complete) + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + else + bpf_map_delete_elem(&iosdiag_maps, &key); + return 0; +} +#endif diff --git a/source/tools/detect/iosdiag/latency/collect.c b/source/tools/detect/iosdiag/latency/collect.c new file mode 100644 index 00000000..48a26aed --- /dev/null +++ b/source/tools/detect/iosdiag/latency/collect.c @@ -0,0 +1,275 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ebpf_load.h" +#include "iosdiag.h" +#include "format_json.h" +#include + +#define min(x, y) ((x) > (y) ? (y) : (x)) + +#define DECLEAR_BPF_OBJ(name) \ + static struct name##_bpf *name; \ + static int name##_bpf_load; \ + +DECLEAR_BPF_OBJ(iosdiag_virtblk); +DECLEAR_BPF_OBJ(iosdiag_nvme); +DECLEAR_BPF_OBJ(iosdiag_scsi); +static int iosdiag_map; +static int iosdiag_virtblk_map; +static int iosdiag_maps_targetdevt; +static int g_stop; + +extern unsigned long get_threshold_us(void); +static int exec_shell_cmd(char *cmd) +{ + char buf[64]; + FILE *fp; + + if (!cmd) + return -1; + + if ((fp = popen(cmd, "r")) == NULL) { + fprintf(stderr, "exec \'%s\' fail\n", cmd); + return -1; + } + + while (fgets(buf, sizeof(buf) - 1, fp)); + pclose(fp); + return 0; +} + +static int over_threshold(struct iosdiag_req *iop) +{ + unsigned long threshold_ns = get_threshold_us() * 1000; + unsigned long delay_ns = iop->ts[IO_COMPLETE_TIME_POINT] - + iop->ts[IO_START_POINT]; + + if (threshold_ns && delay_ns >= threshold_ns) + return 1; + return 0; +} + +static void iosdiag_store_result(int fd) +{ + struct iosdiag_key key, next_key; + struct iosdiag_req iop; + unsigned long sleep_us = get_threshold_us() ? get_threshold_us() : 1000; + char *buf; + int i = 0; + unsigned int seq = 0; + + printf("running..."); + fflush(stdout); + buf = malloc(JSON_BUFFER_SIZE); + memset(buf, 0x0, JSON_BUFFER_SIZE); + while (!g_stop) { + if (bpf_map_get_next_key(iosdiag_map, &key, &next_key) == 0) { + bpf_map_lookup_elem(iosdiag_map, &next_key, &iop); + if (iop.complete) { + if (over_threshold(&iop)) { + seq++; + set_check_time_date(); + summary_convert_to_json(buf, &iop, seq); + delay_convert_to_json(buf + strlen(buf), &iop, seq); + write(fd, buf, strlen(buf)); + } + bpf_map_delete_elem(iosdiag_map, &next_key); + } + key = next_key; + if (i++ > 50) { + usleep(sleep_us); + i = 0; + } + } else + usleep(sleep_us); + } + free(buf); + printf("done\n"); +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + //return vfprintf(stderr, format, args); + return 0; +} + +static void iosdiag_stop(int signo) +{ + //printf("iosdiag stop!\n"); + g_stop = 1; +} + +#define LOAD_IOSDIAG_BPF(name, load_map) \ +({ \ + __label__ out; \ + int __ret = 0; \ + printf("start %s load bpf\n", #name); \ + name = name##_bpf__open(); \ + if (!name) { \ + printf("load bpf error\n"); \ + printf("load %s bpf fail\n", #name); \ + __ret = -1; \ + goto out; \ + } \ + if (name##_bpf__load(name)) { \ + printf("load bpf prog error\n"); \ + printf("load %s bpf fail\n", #name); \ + name##_bpf__destroy(name); \ + __ret = -1; \ + goto out; \ + } \ + if (name##_bpf__attach(name)) { \ + printf("attach bpf prog error\n"); \ + printf("load %s bpf fail\n", #name); \ + name##_bpf__destroy(name); \ + __ret = -1; \ + goto out; \ + } \ + if (load_map) { \ + iosdiag_map = bpf_map__fd(name->maps.iosdiag_maps); \ + iosdiag_maps_targetdevt = bpf_map__fd(name->maps.iosdiag_maps_targetdevt); \ + } \ + if (!__ret) \ + printf("load %s bpf success\n", #name); \ + name##_bpf_load = 1; \ +out: \ + __ret; \ +}) + +static unsigned int get_devt_by_devname(char *devname) +{ + char sys_file[64]; + char cmd[128]; + char dev[16]; + FILE *fp; + int major, minor; + + sprintf(sys_file, "/sys/block/%s/dev", devname); + if (access(sys_file, F_OK)) + sprintf(sys_file, "/sys/block/*/%s/../dev", devname); + + sprintf(cmd, "cat %s 2>/dev/null", sys_file); + if ((fp = popen(cmd, "r")) == NULL) { + fprintf(stderr, "exec \'%s\' fail\n", cmd); + return 0; + } + + while (fgets(dev, sizeof(dev) - 1, fp)) { + if (sscanf(dev, "%d:%d", &major, &minor) != 2) { + pclose(fp); + return 0; + } + } + pclose(fp); + return ((major << 20) | minor); +} + +static char *get_module_name_by_devname(char *devname) +{ + char sys_file[64] = {0}; + char file_path[PATH_MAX] = {0}; + int ret; + + sprintf(sys_file, "/sys/class/block/%s", devname); + ret = readlink(sys_file, file_path, PATH_MAX); + if (ret < 0 || ret >= PATH_MAX) + return "none"; + if (strstr(file_path, "virtio")) + return "virtblk"; + else if (strstr(file_path, "nvme")) + return "nvme"; + else if (strstr(file_path, "target")) + return "scsi"; + return "none"; +} + +int iosdiag_init(char *devname) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int key = 0; + unsigned int target_devt = get_devt_by_devname(devname); + char *module_name = get_module_name_by_devname(devname); + + setrlimit(RLIMIT_MEMLOCK, &r); + + libbpf_set_print(libbpf_print_fn); + if (!strcmp(module_name, "virtblk")) { + if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1)) + return -1; + } else if (!strcmp(module_name, "nvme")) { + if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1)) + return -1; + } else if (!strcmp(module_name, "scsi")) { + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1)) + return -1; + } else { + if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1)) { + if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1)) { + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1)) + return -1; + } else { + LOAD_IOSDIAG_BPF(iosdiag_scsi, 0); + } + } else { + LOAD_IOSDIAG_BPF(iosdiag_nvme, 0); + LOAD_IOSDIAG_BPF(iosdiag_scsi, 0); + } + } + if (iosdiag_virtblk_bpf_load) + iosdiag_virtblk_map = + bpf_map__fd(iosdiag_virtblk->maps.iosdiag_virtblk_maps); + if (target_devt) + bpf_map_update_elem(iosdiag_maps_targetdevt, &key, &target_devt, BPF_ANY); + return 0; +} + +int iosdiag_run(int timeout, char *output_file) +{ + int fd_log; + char filepath[256]; + char cmd[272]; + + if (strlen(output_file) > (sizeof(filepath) - 1)) { + printf("error: output file name(%s) too large(max %lu bytes)\n", + output_file, sizeof(filepath)); + return -1; + } + strcpy(filepath, output_file); + sprintf(cmd, "mkdir %s -p", dirname(filepath)); + exec_shell_cmd(cmd); + fd_log = open(output_file, O_RDWR | O_CREAT, 0755); + if (fd_log < 0) { + printf("error: create output file \"%s\" fail\n", output_file); + return -1; + } + signal(SIGINT, iosdiag_stop); + signal(SIGALRM, iosdiag_stop); + if (timeout) + alarm(timeout); + iosdiag_store_result(fd_log); + close(fd_log); + return 0; +} + +void iosdiag_exit(char *module_name) +{ + if (iosdiag_virtblk_bpf_load) + iosdiag_virtblk_bpf__destroy(iosdiag_virtblk); + if (iosdiag_nvme_bpf_load) + iosdiag_nvme_bpf__destroy(iosdiag_nvme); + if (iosdiag_scsi_bpf_load) + iosdiag_scsi_bpf__destroy(iosdiag_scsi); +} + diff --git a/source/tools/detect/iosdiag/latency/ebpf_load.h b/source/tools/detect/iosdiag/latency/ebpf_load.h new file mode 100644 index 00000000..8face9a5 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/ebpf_load.h @@ -0,0 +1,8 @@ +#ifndef _EBPF_LOAD_H +#define _EBPF_LOAD_H +#include +#include +#include "iosdiag_virtblk.skel.h" +#include "iosdiag_nvme.skel.h" +#include "iosdiag_scsi.skel.h" +#endif diff --git a/source/tools/detect/iosdiag/latency/format_json.c b/source/tools/detect/iosdiag/latency/format_json.c new file mode 100644 index 00000000..a01a7528 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/format_json.c @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" +#include "format_json.h" +#include + +#define min(x, y) ((x) > (y) ? (y) : (x)) +struct ts_info { + char *str; + int idx; +}; + +struct ts_info g_points[] = { + {"start", IO_START_POINT}, + {"issue_driver", IO_ISSUE_DRIVER_POINT}, + {"issue_device", IO_ISSUE_DEVICE_POINT}, + {"device_complete", IO_RESPONCE_DRIVER_POINT}, + {"complete", IO_COMPLETE_TIME_POINT}, +}; + +struct ts_info g_delays[] = { + //{"total", IO_START_POINT}, + {"block", IO_ISSUE_DRIVER_POINT}, + {"driver", IO_ISSUE_DEVICE_POINT}, + {"disk", IO_RESPONCE_DRIVER_POINT}, + {"complete", IO_COMPLETE_TIME_POINT}, +}; + +static char *g_check_date; + +static char *point_idx_to_str(int idx) +{ + int i = 0; + + for (; i < (sizeof(g_points) / sizeof(g_points[0])); i++) { + if (g_points[i].idx == idx) + return g_points[i].str; + } + return NULL; +} + +static char *delay_idx_to_str(int idx) +{ + int i = 0; + + for (; i < (sizeof(g_delays) / sizeof(g_delays[0])); i++) { + if (g_delays[i].idx == idx) + return g_delays[i].str; + } + return NULL; +} + +void set_check_time_date(void) +{ + time_t t; + struct tm *date; + + t = time(NULL); + + date = localtime(&t); + g_check_date = asctime(date); + g_check_date[24] = '\0'; +} + +static char *get_check_time_date(void) +{ + return g_check_date; +} + +static unsigned long get_total_delay(struct iosdiag_req *iop) +{ + return iop->ts[MAX_POINT - 1] / 1000 - iop->ts[IO_START_POINT] / 1000; +} + +static unsigned long get_max_delay(struct iosdiag_req *iop) +{ + int i; + unsigned long delay; + unsigned long max_delay = 0; + + for (i = IO_START_POINT + 1; i < MAX_POINT; i++) { + delay = iop->ts[i] / 1000 - iop->ts[i - 1] / 1000; + if (max_delay < delay) + max_delay = delay; + } + return max_delay; +} + +static char *get_max_delay_component(struct iosdiag_req *iop) +{ + int i, idx = 0; + unsigned long delay; + unsigned long max_delay = 0; + + for (i = IO_START_POINT + 1; i < MAX_POINT; i++) { + delay = iop->ts[i] / 1000 - iop->ts[i - 1] / 1000; + if (max_delay < delay) { + max_delay = delay; + idx = i; + } + } + return idx != 0 ? delay_idx_to_str(idx) : ""; +} + +static int is_disk_delay(struct iosdiag_req *iop) +{ + if (strcmp(get_max_delay_component(iop), "disk")) + return 0; + return 1; +} + +void point_convert_to_json(void *dest, void *src, unsigned int seq) +{ + int i; + struct iosdiag_req *iop = src; + + sprintf(dest, + "{\"seq\":\"%u\"," + "\"diskname\":\"%s\"," + "\"points\":[", seq, iop->diskname); + for (i = 0; i < MAX_POINT; i++) { + if (!iop->ts[i]) + continue; + sprintf(dest + strlen(dest), + "{\"point\":\"%s\",\"ts\":%llu}", + point_idx_to_str(i), (iop->ts[i] / 1000)); + if (i != (MAX_POINT - 1)) + sprintf(dest + strlen(dest), "%s", ","); + } + sprintf(dest + strlen(dest), "%s", "]}\n"); +} + +void delay_convert_to_json(void *dest, void *src, unsigned int seq) +{ + int i, n; + int skip = 0; + unsigned long delay; + struct iosdiag_req *iop = src; + + sprintf(dest, + "{\"seq\":\"%u\"," + "\"diskname\":\"%s\",", + seq, + iop->diskname); + for (i = 0, n = 0; i < MAX_POINT; i++) { + if (i == IO_START_POINT) { + delay = iop->ts[MAX_POINT - 1] / 1000 - + iop->ts[IO_START_POINT] / 1000; + sprintf(dest + strlen(dest), + "\"totaldelay\":%lu," + "\"delays\":[", + delay); + continue; + } else { + if (!skip) + n = i - 1; + if (iop->ts[i] > iop->ts[n]) { + delay = iop->ts[i] / 1000 - iop->ts[n] / 1000; + skip = 0; + } else { + skip = 1; + continue; + } + } + sprintf(dest + strlen(dest), + "{\"component\":\"%s\",\"delay\":%lu}", + delay_idx_to_str(i), delay); + if (i != (MAX_POINT - 1)) + sprintf(dest + strlen(dest), "%s", ","); + } + sprintf(dest + strlen(dest), "%s", "]}\n"); +} + +void summary_convert_to_json(void *dest, void *src, unsigned int seq) +{ + char cpu[24] = {0}; + char component[16] = {0}; + struct iosdiag_req *iop = src; + char *maxdelay_component = get_max_delay_component(iop); + unsigned long max_delay = get_max_delay(iop); + unsigned long total_delay = get_total_delay(iop); + + if (!is_disk_delay(iop)) { + sprintf(component, "os(%s)", maxdelay_component); + maxdelay_component = component; + } + + if (iop->cpu[0] == iop->cpu[1] && iop->cpu[1] == iop->cpu[2]) + sprintf(cpu, "%d", iop->cpu[0]); + else + sprintf(cpu, "%d -> %d -> %d", + iop->cpu[0], iop->cpu[1], iop->cpu[2]); + //blk_rq_op_name(iop->cmd_flags, buf, sizeof(buf)); + sprintf(dest, + "{\"seq\":\"%u\"," + "\"time\":\"%s\"," + "\"abnormal\":\"%s delay (%lu:%lu us)\"," + "\"diskname\":\"%s\"," + "\"iotype\":\"%s\"," + "\"sector\":%lu," + "\"datalen\":%u," + "\"comm\":\"%s\"," + "\"pid\":%d," + "\"cpu\":\"%s\"}\n", + seq, + get_check_time_date(), + maxdelay_component, + max_delay, + total_delay, + iop->diskname, + iop->op, + iop->sector, + iop->data_len, + iop->comm, + iop->pid, + cpu); +} + diff --git a/source/tools/detect/iosdiag/latency/format_json.h b/source/tools/detect/iosdiag/latency/format_json.h new file mode 100644 index 00000000..63de88df --- /dev/null +++ b/source/tools/detect/iosdiag/latency/format_json.h @@ -0,0 +1,10 @@ +#ifndef _FORMAT_JSON_H +#define _FORMAT_JSON_H + +#define JSON_BUFFER_SIZE 4096 +void set_check_time_date(void); +void summary_convert_to_json(void *dest, void *src, unsigned int seq); +void delay_convert_to_json(void *dest, void *src, unsigned int seq); +void point_convert_to_json(void *dest, void *src, unsigned int seq); +#endif + diff --git a/source/tools/detect/iosdiag/latency/iosdiag.h b/source/tools/detect/iosdiag/latency/iosdiag.h new file mode 100644 index 00000000..cada8db4 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag.h @@ -0,0 +1,47 @@ +#ifndef __IOSDIAG__ +#define __IOSDIAG__ + +#define COUNT (10) +#define BPF_ANY (0) +#define IOSDIAG_PKG_MAGIC 0x494F5049 + +#define REQ_OP_BITS 8 +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define MAX_STACK_DEPTH 12 + +enum ioroute_type{ + IO_START_POINT, + IO_ISSUE_DRIVER_POINT, + IO_ISSUE_DEVICE_POINT, + IO_RESPONCE_DRIVER_POINT, + IO_COMPLETE_TIME_POINT, + MAX_POINT, +}; + +struct iosdiag_req { + pid_t pid; + char comm[16]; + char diskname[32]; + unsigned long long ts[MAX_POINT]; + unsigned int cpu[3]; + unsigned int complete; + //unsigned int cmd_flags; + char op[8]; + unsigned int data_len; + unsigned long sector; +}; + +struct iosdiag_key { +#if 0 + int cpu; + unsigned long long start_time_ns; +// unsigned long long io_start_time_ns; +#endif + unsigned int dev; + unsigned long sector; +}; + +int iosdiag_init(char *module_name); +int iosdiag_run(int timeout, char *output_file); +void iosdiag_exit(char *module_name); +#endif diff --git a/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c new file mode 100644 index 00000000..38c149c4 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c @@ -0,0 +1,77 @@ +#include "bpf_iosdiag_common.h" + +#if 1 +SEC("kprobe/nvme_queue_rq") +int kprobe_nvme_queue_rq(struct pt_regs *ctx) +{ + struct blk_mq_queue_data *bd = + (struct blk_mq_queue_data *)PT_REGS_PARM2(ctx); + bool kick; + struct request *req; + + bpf_probe_read(&kick, sizeof(bool), &bd->last); + if (!kick) + return 0; + + bpf_probe_read(&req, sizeof(struct request *), &bd->rq); + if (!req) { + bpf_printk("kprobe_nvme_queue_rq: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_ISSUE_DRIVER_POINT); +} + +#else +static struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) +{ + unsigned int nr_tags; + struct request *rqs, *rq; + + bpf_probe_read(&nr_tags, sizeof(unsigned int), &tags->nr_tags); + if (tag < nr_tags) { + bpf_probe_read(&rqs, sizeof(struct request *), &tags->rqs); + bpf_probe_read(&rq, sizeof(struct request *), + rqs + sizeof(struct request *) * tag); + return rq; + } + return NULL; +} + +SEC("kprobe/nvme_submit_cmd") +int kprobe_nvme_submit_cmd(struct pt_regs *ctx) +{ + struct nvme_queue *nvmeq = (struct nvme_queue *)PT_REGS_PARM1(ctx); + void *nvme_cmd = (void *)PT_REGS_PARM2(ctx); + bool kick = (bool)PT_REGS_PARM3(ctx); + struct blk_mq_tags *tags; + unsigned short tag; + struct request *req; + + if (!kick) + return; + + bpf_probe_read(&tags, sizeof(struct blk_mq_tags *), &nvmeq->tags); + bpf_probe_read(&tags, sizeof(struct blk_mq_tags *), tags); + bpf_probe_read(&tag, sizeof(unsigned short), (nvme_cmd + 2)); + + req = blk_mq_tag_to_rq(tags, tag); + if (!req) { + bpf_printk("kprobe_nvme_submit_cmd: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_ISSUE_DEVICE_POINT); +} +#endif +SEC("kprobe/blk_mq_complete_request") +int kprobe_blk_mq_complete_request(struct pt_regs *ctx) +{ + struct request *req = (struct request *)PT_REGS_PARM1(ctx); + + if (!req) { + bpf_printk("kprobe_blk_mq_complete_request: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); +} +char _license[] SEC("license") = "GPL"; + diff --git a/source/tools/detect/iosdiag/latency/iosdiag_scsi.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_scsi.bpf.c new file mode 100644 index 00000000..e136c434 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag_scsi.bpf.c @@ -0,0 +1,31 @@ +#include "bpf_iosdiag_common.h" + +SEC("kprobe/scsi_dispatch_cmd") +int kprobe_scsi_dispatch_cmd(struct pt_regs *ctx) +{ + struct scsi_cmnd *cmd = (struct scsi_cmnd *)PT_REGS_PARM1(ctx); + struct request *req; + + bpf_probe_read(&req, sizeof(struct request *), &cmd->request); + if (!req) { + bpf_printk("kprobe_scsi_dispatch_cmd: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_ISSUE_DEVICE_POINT); +} + +SEC("kprobe/scsi_done") +int kprobe_scsi_done(struct pt_regs *ctx) +{ + struct scsi_cmnd *cmd = (struct scsi_cmnd *)PT_REGS_PARM1(ctx); + struct request *req; + + bpf_probe_read(&req, sizeof(struct request *), &cmd->request); + if (!req) { + bpf_printk("kprobe_scsi_done: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); +} +char _license[] SEC("license") = "GPL"; + diff --git a/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c new file mode 100644 index 00000000..1f801e14 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c @@ -0,0 +1,61 @@ +#include "bpf_iosdiag_common.h" + +struct bpf_map_def SEC("maps") iosdiag_virtblk_maps = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(pid_t), + .value_size = sizeof(unsigned long), + .max_entries = 20480, +}; + +SEC("kprobe/virtio_queue_rq") +int kprobe_virtio_queue_rq(struct pt_regs *ctx) +{ + struct blk_mq_queue_data *bd = + (struct blk_mq_queue_data *)PT_REGS_PARM2(ctx); + bool kick; + unsigned long req_addr; + pid_t pid = bpf_get_current_pid_tgid(); + + bpf_probe_read(&kick, sizeof(bool), &bd->last); + if (!kick) + return 0; + + bpf_probe_read(&req_addr, sizeof(struct request *), &bd->rq); + if (!req_addr) { + bpf_printk("kprobe_virtio_queue_rq: con't get request"); + return 0; + } + bpf_map_update_elem(&iosdiag_virtblk_maps, &pid, &req_addr, BPF_ANY); + return 0; +} + +SEC("kretprobe/virtio_queue_rq") +int kretprobe_virtio_queue_rq(struct pt_regs *ctx) +{ + int ret = PT_REGS_RC(ctx); + unsigned long *req_addr; + pid_t pid = bpf_get_current_pid_tgid(); + + if (!ret) { + req_addr = bpf_map_lookup_elem(&iosdiag_virtblk_maps, &pid); + if (!req_addr || !(*req_addr)) + return 0; + trace_io_driver_route(ctx, (struct request *)*req_addr, IO_ISSUE_DEVICE_POINT); + } + bpf_map_delete_elem(&iosdiag_virtblk_maps, &pid); + return 0; +} + +SEC("kprobe/blk_mq_complete_request") +int kprobe_blk_mq_complete_request(struct pt_regs *ctx) +{ + struct request *req = (struct request *)PT_REGS_PARM1(ctx); + + if (!req) { + bpf_printk("kprobe_blk_mq_complete_request: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); +} +char _license[] SEC("license") = "GPL"; + diff --git a/source/tools/detect/iosdiag/latency/main.c b/source/tools/detect/iosdiag/latency/main.c new file mode 100644 index 00000000..9ea70d9f --- /dev/null +++ b/source/tools/detect/iosdiag/latency/main.c @@ -0,0 +1,72 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +static void usage(void) +{ + fprintf(stdout, + "\nUsage: \n" + "latency [OPTION] disk_devname Detect IO latency in specified disk\n" + "latency -t ms disk_devname Set IO latency threshold(default 1000ms)\n" + "latency -T sec disk_devname How long to detect IO latency(default 10s)\n" + "latency -f log disk_devname Specify the output file log\n" + "\ne.g.\n" + "latency vda Detect IO latency in disk \"vda\"\n" + "latency -t 10 vda Set IO latency threshold 10ms and detect IO latency in disk \"vda\"\n" + "latency -t 10 -T 30 vda Detect IO latency in disk \"vda\" 30 secs\n"); + exit(-1); +} + +static unsigned long g_threshold_us; +unsigned long get_threshold_us(void) +{ + return g_threshold_us; +} + +int main(int argc, char *argv[]) +{ + int ch; + int timeout_s = 10, threshold_ms = 1000; + char *result_file = "/var/log/sysak/iosdiag/latency/result.log"; + char *devname; + + while ((ch = getopt(argc, argv, "T:t:f:h")) != -1) { + switch (ch) { + case 'T': + timeout_s = (unsigned int)strtoul(optarg, NULL, 0); + break; + case 't': + threshold_ms = (int)strtoul(optarg, NULL, 0); + break; + case 'f': + result_file = optarg; + break; + case 'h': + default: + usage(); + } + } + devname = argv[argc - 1]; + g_threshold_us = threshold_ms * 1000; + if (iosdiag_init(devname)) { + fprintf(stderr, "iosdiag_init fail\n"); + return -1; + } + iosdiag_run(timeout_s, result_file); + iosdiag_exit(devname); + return 0; +} + -- Gitee From f71082c2eb1b0dbb91c25b60e9f36b11b04859a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=85=89=E6=B0=B4?= Date: Thu, 23 Dec 2021 10:10:32 +0000 Subject: [PATCH 02/10] update source/tools/detect/iosdiag/README.md. --- source/tools/detect/iosdiag/README.md | 178 ++++++++++++++------------ 1 file changed, 93 insertions(+), 85 deletions(-) diff --git a/source/tools/detect/iosdiag/README.md b/source/tools/detect/iosdiag/README.md index 727457b4..44e60ef6 100644 --- a/source/tools/detect/iosdiag/README.md +++ b/source/tools/detect/iosdiag/README.md @@ -2,9 +2,9 @@ iosdiag (IO storage diagnostics tools), IO存储诊断工具,目前包括已实现的IO延迟诊断功能以及正在实现的IO夯诊断或其他功能等等 # 代码目录结构 -entry ------- IO诊断功能入口代码 -latency ------- IO延迟诊断代码 -data_analysis ------- IO诊断数据分析与结果输出代码 +entry IO诊断功能入口代码 +latency IO延迟诊断代码 +data_analysis IO诊断数据分析与结果输出代码 # 运行IO延迟诊断功能前置条件 由于基于eBPF实现,因此需要内核支持并启动eBPF @@ -14,17 +14,19 @@ data_analysis ------- IO诊断数据分析与结果输出代码 # 使用 ## 参数说明 -###sysak iosdiag -h +### sysak iosdiag -h +```c Usage: sysak iosdiag [options] subcmd [cmdargs]] - subcmd: +subcmd: latency, 执行io延迟诊断功能 - cmdargs: +cmdargs: -h, 跟在子命令之后显示功能支持参数 - options: +options: -u url, 指定url,将会通过curl命令把诊断日志文件打包上传到此url,不指定不上传 -s latency, 停止诊断 - -###sysak iosdiag latency -h +``` +### sysak iosdiag latency -h +```c Usage: latency [OPTION] disk_devname options: @@ -35,7 +37,7 @@ e.g. latency vda 诊断访问磁盘vda上耗时1000ms的IO,诊断10s后自动退出 latency -t 10 vda 诊断访问磁盘vda上耗时10ms的IO,诊断10s后自动退出 latency -t 10 -T 30 vda 诊断访问磁盘vda上耗时10ms的IO,诊断30s后自动退出 - +``` ## 输出说明 ### 控制台输出 #### 运行命令: @@ -44,114 +46,120 @@ sysak iosdiag latency -t 1 -T 20 vda start iosdiag_virtblk load bpf load iosdiag_virtblk bpf success running...done -#### 运行结果输出:两个维度,整体IO的延迟分布情况(以百分比展示整体IO的延迟点)+输出延迟最大的前TOPn个IO的最大延迟点以及总延迟 +#### 运行结果输出 +两个维度,整体IO的延迟分布情况(以百分比展示整体IO的延迟点)+输出延迟最大的前TOPn个IO的最大延迟点以及总延迟 +```c 15 IOs of disk vda over 1 ms, delay distribution: os(block) delay: 17.147% os(driver) delay: 0.009% disk delay: 82.84% os(complete) delay: 0.002% The first 10 IOs with the largest delay, more details: -seq comm pid iotype datalen abnormal(delay:totaldelay) +seq comm pid iotype datalen abnormal(delay:totaldelay) 11 kworker/u12:2 11943 W 4096 disk delay (145.56:256.88 ms) 12 kworker/u12:2 11943 W 4096 disk delay (145.46:256.66 ms) 15 kworker/u12:2 11943 W 4096 disk delay (217.39:217.51 ms) -14 jbd2/vda1-8 354 FWFS 4096 os(block) delay (143.42:152.93 ms) -13 kworker/u12:2 11943 W 4096 disk delay (145.05:145.30 ms) +14 jbd2/vda1-8 354 FWFS 4096 os(block) delay (143.42:152.93 ms) +13 kworker/u12:2 11943 W 4096 disk delay (145.05:145.30 ms) 3 kworker/u12:2 11943 W 4096 disk delay (113.80:114.00 ms) 5 kworker/u12:2 11943 W 8192 disk delay (112.97:113.14 ms) 1 kworker/u12:2 11943 W 4096 disk delay (111.79:111.96 ms) -10 kworker/u12:2 11943 W 8192 disk delay (111.62:111.78 ms) +10 kworker/u12:2 11943 W 8192 disk delay (111.62:111.78 ms) 4 kworker/u12:2 11943 W 4096 disk delay (111.11:111.30 ms) more details see /var/log/sysak/iosdiag/latency/result.log* - +``` ### 日志文件说明 日志文件中的数据均以磁盘为单位以json数组的方式呈现 #### /var/log/sysak/iosdiag/latency/result.log 该日志文件描述的是每一个延迟IO的事件信息,通过seq可以从result.log.seq文件中索引到IO的延迟信息 +```c { - "summary":[ //以磁盘为一个单位的数组 - { - "diskname":"vda", //磁盘盘符 - "slow ios":[ //每一个该磁盘下的io为一个单位的数组 - { - "seq":"11", //通过这个序号可以从result.log.seq中找到此IO的延迟分布 - "time":"Thu Dec 23 14:42:10 2021", //检测到次超时IO的时间 - "abnormal":"disk delay (145.56:256.88 ms)", //此IO的延迟最大的点(延迟最大的组件的延迟:总延迟) - "iotype":"W", //此IO类型 - "sector":23695488, //此IO访问磁盘的具体偏移位置 - "datalen":4096, //次IO访问磁盘的数据量 - "comm":"kworker/u12:2", //发起此IO的进程 - "pid":11943, //进程ID - "cpu":"2 -> 4 -> 4" //发起此IO的CPU -> 响应IO完成之后磁盘中断的CPU -> 磁盘IO完成后执行软中断的CPU - //如只显示一个CPU编号,说明发起IO和执行中断的CPU相同,要注意也有磁盘是没有软中断流程的 - }, - {第二个IO事件信息}, - ... - ] + "summary":[ //以磁盘为一个单位的数组 + { + "diskname":"vda", //磁盘盘符 + "slow ios":[ //每一个该磁盘下的io为一个单位的数组 + { + "seq":"11", //通过这个序号可以从result.log.seq中找到此IO的延迟分布 + "time":"Thu Dec 23 14:42:10 2021", //检测到次超时IO的时间 + "abnormal":"disk delay (145.56:256.88 ms)", //此IO的延迟最大的点(延迟最大的组件的延迟:总延迟) + "iotype":"W", //此IO类型 + "sector":23695488, //此IO访问磁盘的具体偏移位置 + "datalen":4096, //次IO访问磁盘的数据量 + "comm":"kworker/u12:2", //发起此IO的进程 + "pid":11943, //进程ID + "cpu":"2 -> 4 -> 4" //发起此IO的CPU -> 响应IO完成之后磁盘中断的CPU -> 磁盘IO完成后执行软中断的CPU + //如只显示一个CPU编号,说明发起IO和执行中断的CPU相同,要注意也有磁盘是没有软中断流程的 + }, + {第二个IO事件信息}, + ... + ] }, {第二个磁盘}, ... - ] + ] } +``` #### /var/log/sysak/iosdiag/latency/result.log.seq 该日志文件描述的是每一个延迟IO在各组建的延迟分布,通过seq可以从result.log文件中索引到IO的详细信息 +```c { - "summary":[ //以磁盘为一个单位的数组 - { - "diskname":"vda", //磁盘盘符 - "slow ios":[ //每一个该磁盘下的io延迟信息为一个单位的数组 + "summary":[ //以磁盘为一个单位的数组 { - "seq":"11", //通过这个序号可以从result.log中找到此IO的详细信息 - "totaldelay":256884, //此IO总耗时 - "delays":[ //以此IO的每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete - { - "component":"block", //组建名 - "delay":111300 //此IO在该组建的时延,单位us - }, - { - "component":"driver", - "delay":25 - }, - { - "component":"disk", - "delay":145557 - }, - { - "component":"complete", - "delay":2 - } - ] + "diskname":"vda", //磁盘盘符 + "slow ios":[ //每一个该磁盘下的io延迟信息为一个单位的数组 + { + "seq":11, //通过这个序号可以从result.log中找到此IO的详细信息 + "totaldelay":256884, //此IO总耗时 + "delays":[ //以此IO的每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete + { + "component":"block", //组建名 + "delay":111300 //此IO在该组建的时延,单位us + }, + { + "component":"driver", + "delay":25 + }, + { + "component":"disk", + "delay":145557 + }, + { + "component":"complete", + "delay":2 + } + ] + }, + {第二个IO延迟信息}, + ... + ] }, - {第二个IO延迟信息}, + {第二个磁盘}, ... - ] - }, - {第二个磁盘}, - ... - ] + ] } +``` #### /var/log/sysak/iosdiag/latency/result.log.stat +```c 该日志文件描述的是在磁盘角度,所有IO的延迟分布统计信息 { - "summary":[ //以磁盘为一个单位的数组 - { - "diskname":"vda", //磁盘盘符 - "delays":[ //以每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete + "summary":[ //以磁盘为一个单位的数组 { - "component":"os(block)", //组建名 - "percent":"17.147%", //在捕获的该磁盘的IO中,经统计在此组件耗时的百分比 - "max":143422, //在此组件的最大耗时,单位us - "min":76, //在此组件的最小耗时,单位us - "avg":24518 //在此组件的平均耗时,单位us + "diskname":"vda", //磁盘盘符 + "delays":[ //以每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete + { + "component":"os(block)", //组建名 + "percent":"17.147%", //在捕获的该磁盘的IO中,经统计在此组件耗时的百分比 + "max":143422, //在此组件的最大耗时,单位us + "min":76, //在此组件的最小耗时,单位us + "avg":24518 //在此组件的平均耗时,单位us + }, + {"os(driver)" 延迟情况}, + {"disk" 延迟情况}, + { "os(complete)" 延迟情况} + ] }, - {"os(driver)" 延迟情况}, - {"disk" 延迟情况}, - { "os(complete)" 延迟情况} - ] - }, - {第二个磁盘}, - ... - ] + {第二个磁盘}, + ... + ] } - - +``` \ No newline at end of file -- Gitee From df68a0059343c748f6f78554782dc2e4cf10381a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=85=89=E6=B0=B4?= Date: Thu, 23 Dec 2021 10:11:40 +0000 Subject: [PATCH 03/10] update source/tools/detect/iosdiag/README.md. --- source/tools/detect/iosdiag/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/tools/detect/iosdiag/README.md b/source/tools/detect/iosdiag/README.md index 44e60ef6..8a9197ac 100644 --- a/source/tools/detect/iosdiag/README.md +++ b/source/tools/detect/iosdiag/README.md @@ -139,8 +139,8 @@ more details see /var/log/sysak/iosdiag/latency/result.log* } ``` #### /var/log/sysak/iosdiag/latency/result.log.stat -```c 该日志文件描述的是在磁盘角度,所有IO的延迟分布统计信息 +```c { "summary":[ //以磁盘为一个单位的数组 { -- Gitee From cbf4afbe2b369d1cf6f0100662581d8c3019311c Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Thu, 23 Dec 2021 18:54:02 +0800 Subject: [PATCH 04/10] iosdiag: Delete obsolete code Signed-off-by: guangshui.li --- .../detect/iosdiag/latency/iosdiag_nvme.bpf.c | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c index 38c149c4..4bb344b5 100644 --- a/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c +++ b/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c @@ -1,6 +1,5 @@ #include "bpf_iosdiag_common.h" -#if 1 SEC("kprobe/nvme_queue_rq") int kprobe_nvme_queue_rq(struct pt_regs *ctx) { @@ -21,47 +20,6 @@ int kprobe_nvme_queue_rq(struct pt_regs *ctx) return trace_io_driver_route(ctx, req, IO_ISSUE_DRIVER_POINT); } -#else -static struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) -{ - unsigned int nr_tags; - struct request *rqs, *rq; - - bpf_probe_read(&nr_tags, sizeof(unsigned int), &tags->nr_tags); - if (tag < nr_tags) { - bpf_probe_read(&rqs, sizeof(struct request *), &tags->rqs); - bpf_probe_read(&rq, sizeof(struct request *), - rqs + sizeof(struct request *) * tag); - return rq; - } - return NULL; -} - -SEC("kprobe/nvme_submit_cmd") -int kprobe_nvme_submit_cmd(struct pt_regs *ctx) -{ - struct nvme_queue *nvmeq = (struct nvme_queue *)PT_REGS_PARM1(ctx); - void *nvme_cmd = (void *)PT_REGS_PARM2(ctx); - bool kick = (bool)PT_REGS_PARM3(ctx); - struct blk_mq_tags *tags; - unsigned short tag; - struct request *req; - - if (!kick) - return; - - bpf_probe_read(&tags, sizeof(struct blk_mq_tags *), &nvmeq->tags); - bpf_probe_read(&tags, sizeof(struct blk_mq_tags *), tags); - bpf_probe_read(&tag, sizeof(unsigned short), (nvme_cmd + 2)); - - req = blk_mq_tag_to_rq(tags, tag); - if (!req) { - bpf_printk("kprobe_nvme_submit_cmd: con't get request"); - return 0; - } - return trace_io_driver_route(ctx, req, IO_ISSUE_DEVICE_POINT); -} -#endif SEC("kprobe/blk_mq_complete_request") int kprobe_blk_mq_complete_request(struct pt_regs *ctx) { -- Gitee From fa8d26fe367a0c9ba28b431bf175b38ce96d28e2 Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Thu, 10 Feb 2022 11:04:55 +0800 Subject: [PATCH 05/10] iosdiag: Fix the bug that cannot capture IO Signed-off-by: guangshui.li --- source/lib/internal/ebpf/libbpf/src/libbpf.c | 20 ++--- .../iosdiag/latency/bpf_iosdiag_common.h | 34 ++++----- source/tools/detect/iosdiag/latency/collect.c | 76 ++++++++++--------- .../iosdiag/latency/iosdiag_virtblk.bpf.c | 2 +- 4 files changed, 69 insertions(+), 63 deletions(-) diff --git a/source/lib/internal/ebpf/libbpf/src/libbpf.c b/source/lib/internal/ebpf/libbpf/src/libbpf.c index ea3be660..d92a1cc2 100644 --- a/source/lib/internal/ebpf/libbpf/src/libbpf.c +++ b/source/lib/internal/ebpf/libbpf/src/libbpf.c @@ -2589,7 +2589,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) return 0; } -static int __attribute__((unused))bpf_object__sanitize_and_load_btf(struct bpf_object *obj) +static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) { struct btf *kern_btf = obj->btf; bool btf_mandatory, sanitize; @@ -6038,7 +6038,7 @@ patch_insn: } static int -__attribute__((unused))bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) +bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; const struct bpf_core_relo *rec; @@ -6570,14 +6570,14 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) size_t i; int err; - // if (obj->btf_ext) { - // err = bpf_object__relocate_core(obj, targ_btf_path); - // if (err) { - // pr_warn("failed to perform CO-RE relocations: %d\n", - // err); - // return err; - // } - // } + if (obj->btf_ext) { + err = bpf_object__relocate_core(obj, targ_btf_path); + if (err) { + pr_warn("failed to perform CO-RE relocations: %d\n", + err); + return err; + } + } /* relocate data references first for all programs and sub-programs, * as they don't change relative to code locations, so subsequent * subprogram processing won't need to re-calculate any of them diff --git a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h index 85663a52..d11fdf83 100644 --- a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h +++ b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h @@ -22,6 +22,13 @@ struct bpf_map_def SEC("maps") iosdiag_maps_targetdevt = { .max_entries = 1, }; +struct bpf_map_def SEC("maps") iosdiag_maps_notify = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = 0, +}; + static inline int iosdiag_pkg_check(void *data, unsigned int len) { return 1; @@ -40,9 +47,8 @@ static unsigned int get_target_devt(void) } static void -init_iosdiag_key(unsigned int dev, unsigned long sector, struct iosdiag_key *key) +init_iosdiag_key(unsigned long sector, struct iosdiag_key *key) { - key->dev = dev; key->sector = sector; } @@ -53,22 +59,11 @@ trace_io_driver_route(struct pt_regs *ctx, struct request *req, enum ioroute_typ struct iosdiag_req new_ioreq = {0}; struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); - dev_t dev; sector_t sector; struct gendisk *rq_disk; - struct hd_struct *part; - struct device *device; - u32 target_devt = get_target_devt(); - - bpf_probe_read(&part, sizeof(struct hd_struct *), &req->part); - bpf_probe_read(&device, sizeof(struct device *), &part->__dev); - bpf_probe_read(&dev, sizeof(dev_t), &device->devt); - if (target_devt && dev != target_devt) - return 0; bpf_probe_read(§or, sizeof(sector_t), &req->__sector); - - init_iosdiag_key(dev, sector, &key); + init_iosdiag_key(sector, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (!ioreq->ts[type]) @@ -107,7 +102,7 @@ static int tracepoint_block_getrq(struct block_getrq_args *args) return 0; new_ioreq.cpu[0] = new_ioreq.cpu[1] = new_ioreq.cpu[2] = -1; - init_iosdiag_key(args->dev, args->sector, &key); + init_iosdiag_key(args->sector, &key); if (pid) memcpy(new_ioreq.comm, args->comm, sizeof(args->comm)); new_ioreq.ts[IO_START_POINT] = now; @@ -144,7 +139,7 @@ static int tracepoint_block_rq_issue(struct block_rq_issue_args *args) if (target_devt && args->dev != target_devt) return 0; - init_iosdiag_key(args->dev, args->sector, &key); + init_iosdiag_key(args->sector, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (ioreq->ts[type]) @@ -179,11 +174,12 @@ static int tracepoint_block_rq_complete(struct block_rq_complete_args *args) struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); u32 target_devt = get_target_devt(); + int val = 1; if (target_devt && args->dev != target_devt) return 0; - init_iosdiag_key(args->dev, args->sector, &key); + init_iosdiag_key(args->sector, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (!ioreq->ts[IO_COMPLETE_TIME_POINT]) @@ -194,8 +190,10 @@ static int tracepoint_block_rq_complete(struct block_rq_complete_args *args) ioreq->cpu[2] = bpf_get_smp_processor_id(); } else return 0; - if (ioreq->complete) + if (ioreq->complete) { bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + bpf_perf_event_output(args, &iosdiag_maps_notify, 0, &val, sizeof(val)); + } else bpf_map_delete_elem(&iosdiag_maps, &key); return 0; diff --git a/source/tools/detect/iosdiag/latency/collect.c b/source/tools/detect/iosdiag/latency/collect.c index 48a26aed..c3fbc3a7 100644 --- a/source/tools/detect/iosdiag/latency/collect.c +++ b/source/tools/detect/iosdiag/latency/collect.c @@ -29,7 +29,10 @@ DECLEAR_BPF_OBJ(iosdiag_scsi); static int iosdiag_map; static int iosdiag_virtblk_map; static int iosdiag_maps_targetdevt; +static int iosdiag_maps_notify; static int g_stop; +static int g_log_fd = -1; +static char *g_json_buf; extern unsigned long get_threshold_us(void); static int exec_shell_cmd(char *cmd) @@ -56,46 +59,51 @@ static int over_threshold(struct iosdiag_req *iop) unsigned long delay_ns = iop->ts[IO_COMPLETE_TIME_POINT] - iop->ts[IO_START_POINT]; - if (threshold_ns && delay_ns >= threshold_ns) + if (delay_ns >= threshold_ns) return 1; return 0; } -static void iosdiag_store_result(int fd) +static void iosdiag_store_result(void *ctx, int cpu, void *data, __u32 size) { struct iosdiag_key key, next_key; struct iosdiag_req iop; - unsigned long sleep_us = get_threshold_us() ? get_threshold_us() : 1000; - char *buf; - int i = 0; + char *buf = g_json_buf; unsigned int seq = 0; + int fd = g_log_fd; - printf("running..."); - fflush(stdout); - buf = malloc(JSON_BUFFER_SIZE); - memset(buf, 0x0, JSON_BUFFER_SIZE); - while (!g_stop) { - if (bpf_map_get_next_key(iosdiag_map, &key, &next_key) == 0) { - bpf_map_lookup_elem(iosdiag_map, &next_key, &iop); - if (iop.complete) { - if (over_threshold(&iop)) { - seq++; - set_check_time_date(); - summary_convert_to_json(buf, &iop, seq); - delay_convert_to_json(buf + strlen(buf), &iop, seq); - write(fd, buf, strlen(buf)); - } - bpf_map_delete_elem(iosdiag_map, &next_key); - } - key = next_key; - if (i++ > 50) { - usleep(sleep_us); - i = 0; + while (bpf_map_get_next_key(iosdiag_map, &key, &next_key) == 0) { + bpf_map_lookup_elem(iosdiag_map, &next_key, &iop); + if (iop.complete) { + if (over_threshold(&iop)) { + seq++; + set_check_time_date(); + summary_convert_to_json(buf, &iop, seq); + delay_convert_to_json(buf + strlen(buf), &iop, seq); + write(fd, buf, strlen(buf)); } - } else - usleep(sleep_us); + bpf_map_delete_elem(iosdiag_map, &next_key); + } + key = next_key; } - free(buf); +} + +static void iosdiag_collect(void) +{ + struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb; + + pb_opts.sample_cb = iosdiag_store_result; + pb = perf_buffer__new(iosdiag_maps_notify, 1, &pb_opts); + + printf("running..."); + fflush(stdout); + g_json_buf = malloc(JSON_BUFFER_SIZE); + memset(g_json_buf, 0x0, JSON_BUFFER_SIZE); + while (!g_stop) + perf_buffer__poll(pb, 100); + perf_buffer__free(pb); + free(g_json_buf); printf("done\n"); } @@ -139,6 +147,7 @@ static void iosdiag_stop(int signo) } \ if (load_map) { \ iosdiag_map = bpf_map__fd(name->maps.iosdiag_maps); \ + iosdiag_maps_notify = bpf_map__fd(name->maps.iosdiag_maps_notify); \ iosdiag_maps_targetdevt = bpf_map__fd(name->maps.iosdiag_maps_targetdevt); \ } \ if (!__ret) \ @@ -237,7 +246,6 @@ int iosdiag_init(char *devname) int iosdiag_run(int timeout, char *output_file) { - int fd_log; char filepath[256]; char cmd[272]; @@ -249,8 +257,8 @@ int iosdiag_run(int timeout, char *output_file) strcpy(filepath, output_file); sprintf(cmd, "mkdir %s -p", dirname(filepath)); exec_shell_cmd(cmd); - fd_log = open(output_file, O_RDWR | O_CREAT, 0755); - if (fd_log < 0) { + g_log_fd = open(output_file, O_RDWR | O_CREAT, 0755); + if (g_log_fd < 0) { printf("error: create output file \"%s\" fail\n", output_file); return -1; } @@ -258,8 +266,8 @@ int iosdiag_run(int timeout, char *output_file) signal(SIGALRM, iosdiag_stop); if (timeout) alarm(timeout); - iosdiag_store_result(fd_log); - close(fd_log); + iosdiag_collect(); + close(g_log_fd); return 0; } diff --git a/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c index 1f801e14..cd190921 100644 --- a/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c +++ b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c @@ -4,7 +4,7 @@ struct bpf_map_def SEC("maps") iosdiag_virtblk_maps = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(pid_t), .value_size = sizeof(unsigned long), - .max_entries = 20480, + .max_entries = 2048, }; SEC("kprobe/virtio_queue_rq") -- Gitee From cb5ae2d5d99a8b765d6bef2787fea523d7f86ce3 Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Thu, 10 Feb 2022 11:27:09 +0800 Subject: [PATCH 06/10] iosdiag: fix conflict Signed-off-by: guangshui.li --- source/tools/detect/iosdiag/latency/collect.c | 59 ------------------- 1 file changed, 59 deletions(-) diff --git a/source/tools/detect/iosdiag/latency/collect.c b/source/tools/detect/iosdiag/latency/collect.c index 79955082..c3fbc3a7 100644 --- a/source/tools/detect/iosdiag/latency/collect.c +++ b/source/tools/detect/iosdiag/latency/collect.c @@ -59,16 +59,11 @@ static int over_threshold(struct iosdiag_req *iop) unsigned long delay_ns = iop->ts[IO_COMPLETE_TIME_POINT] - iop->ts[IO_START_POINT]; -<<<<<<< HEAD if (delay_ns >= threshold_ns) -======= - if (threshold_ns && delay_ns >= threshold_ns) ->>>>>>> 6ffe786162b71efcc0f3e065a8e97d9cbad033f8 return 1; return 0; } -<<<<<<< HEAD static void iosdiag_store_result(void *ctx, int cpu, void *data, __u32 size) { struct iosdiag_key key, next_key; @@ -109,43 +104,6 @@ static void iosdiag_collect(void) perf_buffer__poll(pb, 100); perf_buffer__free(pb); free(g_json_buf); -======= -static void iosdiag_store_result(int fd) -{ - struct iosdiag_key key, next_key; - struct iosdiag_req iop; - unsigned long sleep_us = get_threshold_us() ? get_threshold_us() : 1000; - char *buf; - int i = 0; - unsigned int seq = 0; - - printf("running..."); - fflush(stdout); - buf = malloc(JSON_BUFFER_SIZE); - memset(buf, 0x0, JSON_BUFFER_SIZE); - while (!g_stop) { - if (bpf_map_get_next_key(iosdiag_map, &key, &next_key) == 0) { - bpf_map_lookup_elem(iosdiag_map, &next_key, &iop); - if (iop.complete) { - if (over_threshold(&iop)) { - seq++; - set_check_time_date(); - summary_convert_to_json(buf, &iop, seq); - delay_convert_to_json(buf + strlen(buf), &iop, seq); - write(fd, buf, strlen(buf)); - } - bpf_map_delete_elem(iosdiag_map, &next_key); - } - key = next_key; - if (i++ > 50) { - usleep(sleep_us); - i = 0; - } - } else - usleep(sleep_us); - } - free(buf); ->>>>>>> 6ffe786162b71efcc0f3e065a8e97d9cbad033f8 printf("done\n"); } @@ -189,10 +147,7 @@ static void iosdiag_stop(int signo) } \ if (load_map) { \ iosdiag_map = bpf_map__fd(name->maps.iosdiag_maps); \ -<<<<<<< HEAD iosdiag_maps_notify = bpf_map__fd(name->maps.iosdiag_maps_notify); \ -======= ->>>>>>> 6ffe786162b71efcc0f3e065a8e97d9cbad033f8 iosdiag_maps_targetdevt = bpf_map__fd(name->maps.iosdiag_maps_targetdevt); \ } \ if (!__ret) \ @@ -291,10 +246,6 @@ int iosdiag_init(char *devname) int iosdiag_run(int timeout, char *output_file) { -<<<<<<< HEAD -======= - int fd_log; ->>>>>>> 6ffe786162b71efcc0f3e065a8e97d9cbad033f8 char filepath[256]; char cmd[272]; @@ -306,13 +257,8 @@ int iosdiag_run(int timeout, char *output_file) strcpy(filepath, output_file); sprintf(cmd, "mkdir %s -p", dirname(filepath)); exec_shell_cmd(cmd); -<<<<<<< HEAD g_log_fd = open(output_file, O_RDWR | O_CREAT, 0755); if (g_log_fd < 0) { -======= - fd_log = open(output_file, O_RDWR | O_CREAT, 0755); - if (fd_log < 0) { ->>>>>>> 6ffe786162b71efcc0f3e065a8e97d9cbad033f8 printf("error: create output file \"%s\" fail\n", output_file); return -1; } @@ -320,13 +266,8 @@ int iosdiag_run(int timeout, char *output_file) signal(SIGALRM, iosdiag_stop); if (timeout) alarm(timeout); -<<<<<<< HEAD iosdiag_collect(); close(g_log_fd); -======= - iosdiag_store_result(fd_log); - close(fd_log); ->>>>>>> 6ffe786162b71efcc0f3e065a8e97d9cbad033f8 return 0; } -- Gitee From 7afb8af3cde86a78b47219ddf3094a4b42161711 Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Thu, 10 Feb 2022 19:02:47 +0800 Subject: [PATCH 07/10] iosdiag: Modify times to msecs Signed-off-by: guangshui.li --- .../detect/iosdiag/latency/format_json.c | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/source/tools/detect/iosdiag/latency/format_json.c b/source/tools/detect/iosdiag/latency/format_json.c index a01a7528..ac3d92ed 100644 --- a/source/tools/detect/iosdiag/latency/format_json.c +++ b/source/tools/detect/iosdiag/latency/format_json.c @@ -38,7 +38,7 @@ struct ts_info g_delays[] = { {"complete", IO_COMPLETE_TIME_POINT}, }; -static char *g_check_date; +static char g_check_date[24]; static char *point_idx_to_str(int idx) { @@ -64,14 +64,19 @@ static char *delay_idx_to_str(int idx) void set_check_time_date(void) { - time_t t; - struct tm *date; - - t = time(NULL); - - date = localtime(&t); - g_check_date = asctime(date); - g_check_date[24] = '\0'; + struct timeval tv; + struct tm *p; + + gettimeofday(&tv, NULL); + p = localtime(&tv.tv_sec); + sprintf(g_check_date, "%d-%d-%d %d:%d:%d.%ld", + 1900+p->tm_year, + 1+p->tm_mon, + p->tm_mday, + p->tm_hour, + p->tm_min, + p->tm_sec, + tv.tv_usec / 1000); } static char *get_check_time_date(void) -- Gitee From b8bc5675107614d84f080a13bd17c6f24d338108 Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Fri, 11 Feb 2022 15:33:10 +0800 Subject: [PATCH 08/10] iosdiag: Fix the bug that cannot capture IO Signed-off-by: guangshui.li --- .../iosdiag/latency/bpf_iosdiag_common.h | 19 +++++++-------- source/tools/detect/iosdiag/latency/collect.c | 24 ++++++------------- .../detect/iosdiag/latency/format_json.c | 18 +++++++------- .../detect/iosdiag/latency/format_json.h | 6 ++--- 4 files changed, 26 insertions(+), 41 deletions(-) diff --git a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h index f42d74e4..baf37839 100644 --- a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h +++ b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h @@ -25,8 +25,7 @@ struct bpf_map_def SEC("maps") iosdiag_maps_targetdevt = { struct bpf_map_def SEC("maps") iosdiag_maps_notify = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = 0, + .value_size = sizeof(int), }; static inline int iosdiag_pkg_check(void *data, unsigned int len) @@ -170,11 +169,11 @@ SEC("tracepoint/block/block_rq_complete") static int tracepoint_block_rq_complete(struct block_rq_complete_args *args) { struct iosdiag_req *ioreq; - struct iosdiag_req new_ioreq = {0}; + struct iosdiag_req data = {0}; struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); u32 target_devt = get_target_devt(); - int val = 1; + int complete = 0; if (target_devt && args->dev != target_devt) return 0; @@ -186,17 +185,15 @@ static int tracepoint_block_rq_complete(struct block_rq_complete_args *args) ioreq->ts[IO_COMPLETE_TIME_POINT] = now; if (ioreq->ts[IO_ISSUE_DEVICE_POINT] && ioreq->ts[IO_RESPONCE_DRIVER_POINT]) - ioreq->complete = 1; + complete = 1; ioreq->cpu[2] = bpf_get_smp_processor_id(); } else return 0; - - if (ioreq->complete) { - bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); - bpf_perf_event_output(args, &iosdiag_maps_notify, 0, &val, sizeof(val)); + if (complete) { + memcpy(&data, ioreq, sizeof(data)); + bpf_perf_event_output(args, &iosdiag_maps_notify, 0xffffffffULL, &data, sizeof(data)); } - else - bpf_map_delete_elem(&iosdiag_maps, &key); + bpf_map_delete_elem(&iosdiag_maps, &key); return 0; } #endif diff --git a/source/tools/detect/iosdiag/latency/collect.c b/source/tools/detect/iosdiag/latency/collect.c index c3fbc3a7..24b355ce 100644 --- a/source/tools/detect/iosdiag/latency/collect.c +++ b/source/tools/detect/iosdiag/latency/collect.c @@ -66,25 +66,15 @@ static int over_threshold(struct iosdiag_req *iop) static void iosdiag_store_result(void *ctx, int cpu, void *data, __u32 size) { - struct iosdiag_key key, next_key; - struct iosdiag_req iop; + struct iosdiag_req *iop = (struct iosdiag_req *)data; char *buf = g_json_buf; - unsigned int seq = 0; int fd = g_log_fd; - while (bpf_map_get_next_key(iosdiag_map, &key, &next_key) == 0) { - bpf_map_lookup_elem(iosdiag_map, &next_key, &iop); - if (iop.complete) { - if (over_threshold(&iop)) { - seq++; - set_check_time_date(); - summary_convert_to_json(buf, &iop, seq); - delay_convert_to_json(buf + strlen(buf), &iop, seq); - write(fd, buf, strlen(buf)); - } - bpf_map_delete_elem(iosdiag_map, &next_key); - } - key = next_key; + if (over_threshold(iop)) { + set_check_time_date(); + summary_convert_to_json(buf, iop); + delay_convert_to_json(buf + strlen(buf), iop); + write(fd, buf, strlen(buf)); } } @@ -109,7 +99,7 @@ static void iosdiag_collect(void) static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) { - //return vfprintf(stderr, format, args); + //return vfprintf(stderr, format, args); return 0; } diff --git a/source/tools/detect/iosdiag/latency/format_json.c b/source/tools/detect/iosdiag/latency/format_json.c index ac3d92ed..ac2094d8 100644 --- a/source/tools/detect/iosdiag/latency/format_json.c +++ b/source/tools/detect/iosdiag/latency/format_json.c @@ -126,15 +126,15 @@ static int is_disk_delay(struct iosdiag_req *iop) return 1; } -void point_convert_to_json(void *dest, void *src, unsigned int seq) +void point_convert_to_json(void *dest, void *src) { int i; struct iosdiag_req *iop = src; sprintf(dest, - "{\"seq\":\"%u\"," + "{\"time\":\"%s\"," "\"diskname\":\"%s\"," - "\"points\":[", seq, iop->diskname); + "\"points\":[", get_check_time_date(), iop->diskname); for (i = 0; i < MAX_POINT; i++) { if (!iop->ts[i]) continue; @@ -147,7 +147,7 @@ void point_convert_to_json(void *dest, void *src, unsigned int seq) sprintf(dest + strlen(dest), "%s", "]}\n"); } -void delay_convert_to_json(void *dest, void *src, unsigned int seq) +void delay_convert_to_json(void *dest, void *src) { int i, n; int skip = 0; @@ -155,9 +155,9 @@ void delay_convert_to_json(void *dest, void *src, unsigned int seq) struct iosdiag_req *iop = src; sprintf(dest, - "{\"seq\":\"%u\"," + "{\"time\":\"%s\"," "\"diskname\":\"%s\",", - seq, + get_check_time_date(), iop->diskname); for (i = 0, n = 0; i < MAX_POINT; i++) { if (i == IO_START_POINT) { @@ -188,7 +188,7 @@ void delay_convert_to_json(void *dest, void *src, unsigned int seq) sprintf(dest + strlen(dest), "%s", "]}\n"); } -void summary_convert_to_json(void *dest, void *src, unsigned int seq) +void summary_convert_to_json(void *dest, void *src) { char cpu[24] = {0}; char component[16] = {0}; @@ -209,8 +209,7 @@ void summary_convert_to_json(void *dest, void *src, unsigned int seq) iop->cpu[0], iop->cpu[1], iop->cpu[2]); //blk_rq_op_name(iop->cmd_flags, buf, sizeof(buf)); sprintf(dest, - "{\"seq\":\"%u\"," - "\"time\":\"%s\"," + "{\"time\":\"%s\"," "\"abnormal\":\"%s delay (%lu:%lu us)\"," "\"diskname\":\"%s\"," "\"iotype\":\"%s\"," @@ -219,7 +218,6 @@ void summary_convert_to_json(void *dest, void *src, unsigned int seq) "\"comm\":\"%s\"," "\"pid\":%d," "\"cpu\":\"%s\"}\n", - seq, get_check_time_date(), maxdelay_component, max_delay, diff --git a/source/tools/detect/iosdiag/latency/format_json.h b/source/tools/detect/iosdiag/latency/format_json.h index 63de88df..ece2b8c9 100644 --- a/source/tools/detect/iosdiag/latency/format_json.h +++ b/source/tools/detect/iosdiag/latency/format_json.h @@ -3,8 +3,8 @@ #define JSON_BUFFER_SIZE 4096 void set_check_time_date(void); -void summary_convert_to_json(void *dest, void *src, unsigned int seq); -void delay_convert_to_json(void *dest, void *src, unsigned int seq); -void point_convert_to_json(void *dest, void *src, unsigned int seq); +void summary_convert_to_json(void *dest, void *src); +void delay_convert_to_json(void *dest, void *src); +void point_convert_to_json(void *dest, void *src); #endif -- Gitee From 557ed889e9229b7f8ff24b35c4a4663916c9bfcc Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Fri, 11 Feb 2022 15:56:38 +0800 Subject: [PATCH 09/10] iosdiag: Fix compile warning Signed-off-by: guangshui.li --- source/tools/detect/iosdiag/latency/format_json.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/tools/detect/iosdiag/latency/format_json.c b/source/tools/detect/iosdiag/latency/format_json.c index ac2094d8..259ae9a5 100644 --- a/source/tools/detect/iosdiag/latency/format_json.c +++ b/source/tools/detect/iosdiag/latency/format_json.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "iosdiag.h" #include "format_json.h" -- Gitee From 46ec3a828e3f8245fd4ebb4ffc5b3ea486c58bef Mon Sep 17 00:00:00 2001 From: "guangshui.li" Date: Fri, 11 Feb 2022 16:13:29 +0800 Subject: [PATCH 10/10] iosdiag: Fix the bug that cannot capture IO Signed-off-by: guangshui.li --- .../data_analysis/iosdiag_data_analysis.py | 34 +++++-------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py index 53279b64..4a1410df 100755 --- a/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py +++ b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py @@ -25,17 +25,6 @@ def humConvert(value): return "%.2f%s/s" % (value, units[i]) value = value / size -def timeConvert(value, withUnit): - units = ["us", "ms", "s"] - size = 1000.0 - for i in range(len(units)): - if (value / size) < 1: - if withUnit: - return "%.2f %s" % (value, units[i]) - else: - return "%.2f" % (value) - value = value / size - class latencyAnalysis: def __init__(self): self.delayStatDicts = {} @@ -112,9 +101,9 @@ class latencyAnalysis: diskIdx = diskIdxDicts[disk] del sDict['diskname'] listAbnormal=[i for i in sDict['abnormal'].split(' ') if i != '']; - delay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[0]), False) - totalDelay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[1]), True) - sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+delay+":"+totalDelay+")" + msDelay=int(listAbnormal[-2].strip('(').split(':')[0]) / 1000.000 + msTotalDelay=int(listAbnormal[-2].strip('(').split(':')[1]) / 1000.000 + sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+str(msDelay)+":"+str(msTotalDelay)+" ms)" summaryDicts['summary'][diskIdx]['slow ios'].append(sDict) def processOneLatencySeq(self, sDict): @@ -153,15 +142,6 @@ class latencyAnalysis: maxPercent = percent delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] = str(percent)+"%" - summaryDicts['summary'][diskIdx]['slow ios']=\ - sorted(summaryDicts['summary'][diskIdx]['slow ios'],\ - key=lambda e:float(re.split(':| ', e['abnormal'])[-2]),\ - reverse=True) - delayDicts['summary'][diskIdx]['slow ios']=\ - sorted(delayDicts['summary'][diskIdx]['slow ios'],\ - key=lambda e:e['totaldelay'],\ - reverse=True) - def latencyPrint(self, threshold): diskIdxDicts = self.diskIdxDicts totalIosDicts = self.totalIosDicts @@ -171,6 +151,10 @@ class latencyAnalysis: for disk, diskIdx in diskIdxDicts.items(): totalIos = totalIosDicts[disk] + summaryDicts['summary'][diskIdx]['slow ios']=\ + sorted(summaryDicts['summary'][diskIdx]['slow ios'],\ + key=lambda e:float(re.split(':| ', e['abnormal'])[-2]),\ + reverse=True) print("\n%d IOs of disk %s over %d ms, delay distribution:" %(totalIos, disk, threshold)) for component,idx in componentDicts.items(): percent = delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] @@ -178,12 +162,12 @@ class latencyAnalysis: end = totalIos if totalIos < 10 else 10 print("The first %d IOs with the largest delay, more details:" % end) - print("seq".ljust(6)+"comm".ljust(20)+"pid".ljust(10)+"iotype".ljust(8)+\ + print("time".ljust(26)+"comm".ljust(20)+"pid".ljust(10)+"iotype".ljust(8)+\ "datalen".ljust(16)+"abnormal(delay:totaldelay)".ljust(40)) for i in range(0,end): eDict=summaryDicts['summary'][diskIdx]['slow ios'][i] - print(str(eDict["seq"]).ljust(6)+eDict["comm"].ljust(20)+\ + print(str(eDict["time"]).ljust(26)+eDict["comm"].ljust(20)+\ str(eDict["pid"]).ljust(10)+eDict["iotype"].ljust(8)+\ str(eDict["datalen"]).ljust(16)+eDict["abnormal"].ljust(40)) -- Gitee