diff --git a/source/tools/detect/iosdiag/Makefile b/source/tools/detect/iosdiag/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4d1f7df8b9d50cbfdd7921989e6d6c77fc293faa --- /dev/null +++ b/source/tools/detect/iosdiag/Makefile @@ -0,0 +1,3 @@ +target := iosdiag + +include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/iosdiag/README.md b/source/tools/detect/iosdiag/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8a9197ac1e51a0330a4d05fc8f77d9c73f96527a --- /dev/null +++ b/source/tools/detect/iosdiag/README.md @@ -0,0 +1,165 @@ +# iosdiag +iosdiag (IO storage diagnostics tools), IO存储诊断工具,目前包括已实现的IO延迟诊断功能以及正在实现的IO夯诊断或其他功能等等 + +# 代码目录结构 +entry IO诊断功能入口代码 +latency IO延迟诊断代码 +data_analysis IO诊断数据分析与结果输出代码 + +# 运行IO延迟诊断功能前置条件 +由于基于eBPF实现,因此需要内核支持并启动eBPF + +# 编译 +在编译sysak的之前,需要在执行configure配置的时候加上--enable-libbpf --enable-target-iosdiag才能编译进sysak + +# 使用 +## 参数说明 +### sysak iosdiag -h +```c +Usage: sysak iosdiag [options] subcmd [cmdargs]] +subcmd: + latency, 执行io延迟诊断功能 +cmdargs: + -h, 跟在子命令之后显示功能支持参数 +options: + -u url, 指定url,将会通过curl命令把诊断日志文件打包上传到此url,不指定不上传 + -s latency, 停止诊断 +``` +### sysak iosdiag latency -h +```c +Usage: latency [OPTION] disk_devname + +options: + -t threshold, 指定超时IO的时间阈值(单位ms),IO时延诊断将过滤完成耗时超过此阈值的IO(默认1000ms) + -T time, 指定诊断运行时长(单位秒)后自动退出(默认10秒) + +e.g. + latency vda 诊断访问磁盘vda上耗时1000ms的IO,诊断10s后自动退出 + latency -t 10 vda 诊断访问磁盘vda上耗时10ms的IO,诊断10s后自动退出 + latency -t 10 -T 30 vda 诊断访问磁盘vda上耗时10ms的IO,诊断30s后自动退出 +``` +## 输出说明 +### 控制台输出 +#### 运行命令: +sysak iosdiag latency -t 1 -T 20 vda +#### 运行过程日志: +start iosdiag_virtblk load bpf +load iosdiag_virtblk bpf success +running...done +#### 运行结果输出 +两个维度,整体IO的延迟分布情况(以百分比展示整体IO的延迟点)+输出延迟最大的前TOPn个IO的最大延迟点以及总延迟 +```c +15 IOs of disk vda over 1 ms, delay distribution: +os(block) delay: 17.147% +os(driver) delay: 0.009% +disk delay: 82.84% +os(complete) delay: 0.002% +The first 10 IOs with the largest delay, more details: +seq comm pid iotype datalen abnormal(delay:totaldelay) +11 kworker/u12:2 11943 W 4096 disk delay (145.56:256.88 ms) +12 kworker/u12:2 11943 W 4096 disk delay (145.46:256.66 ms) +15 kworker/u12:2 11943 W 4096 disk delay (217.39:217.51 ms) +14 jbd2/vda1-8 354 FWFS 4096 os(block) delay (143.42:152.93 ms) +13 kworker/u12:2 11943 W 4096 disk delay (145.05:145.30 ms) +3 kworker/u12:2 11943 W 4096 disk delay (113.80:114.00 ms) +5 kworker/u12:2 11943 W 8192 disk delay (112.97:113.14 ms) +1 kworker/u12:2 11943 W 4096 disk delay (111.79:111.96 ms) +10 kworker/u12:2 11943 W 8192 disk delay (111.62:111.78 ms) +4 kworker/u12:2 11943 W 4096 disk delay (111.11:111.30 ms) +more details see /var/log/sysak/iosdiag/latency/result.log* +``` +### 日志文件说明 +日志文件中的数据均以磁盘为单位以json数组的方式呈现 +#### /var/log/sysak/iosdiag/latency/result.log +该日志文件描述的是每一个延迟IO的事件信息,通过seq可以从result.log.seq文件中索引到IO的延迟信息 +```c +{ + "summary":[ //以磁盘为一个单位的数组 + { + "diskname":"vda", //磁盘盘符 + "slow ios":[ //每一个该磁盘下的io为一个单位的数组 + { + "seq":"11", //通过这个序号可以从result.log.seq中找到此IO的延迟分布 + "time":"Thu Dec 23 14:42:10 2021", //检测到次超时IO的时间 + "abnormal":"disk delay (145.56:256.88 ms)", //此IO的延迟最大的点(延迟最大的组件的延迟:总延迟) + "iotype":"W", //此IO类型 + "sector":23695488, //此IO访问磁盘的具体偏移位置 + "datalen":4096, //次IO访问磁盘的数据量 + "comm":"kworker/u12:2", //发起此IO的进程 + "pid":11943, //进程ID + "cpu":"2 -> 4 -> 4" //发起此IO的CPU -> 响应IO完成之后磁盘中断的CPU -> 磁盘IO完成后执行软中断的CPU + //如只显示一个CPU编号,说明发起IO和执行中断的CPU相同,要注意也有磁盘是没有软中断流程的 + }, + {第二个IO事件信息}, + ... + ] + }, + {第二个磁盘}, + ... + ] +} +``` +#### /var/log/sysak/iosdiag/latency/result.log.seq +该日志文件描述的是每一个延迟IO在各组建的延迟分布,通过seq可以从result.log文件中索引到IO的详细信息 +```c +{ + "summary":[ //以磁盘为一个单位的数组 + { + "diskname":"vda", //磁盘盘符 + "slow ios":[ //每一个该磁盘下的io延迟信息为一个单位的数组 + { + "seq":11, //通过这个序号可以从result.log中找到此IO的详细信息 + "totaldelay":256884, //此IO总耗时 + "delays":[ //以此IO的每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete + { + "component":"block", //组建名 + "delay":111300 //此IO在该组建的时延,单位us + }, + { + "component":"driver", + "delay":25 + }, + { + "component":"disk", + "delay":145557 + }, + { + "component":"complete", + "delay":2 + } + ] + }, + {第二个IO延迟信息}, + ... + ] + }, + {第二个磁盘}, + ... + ] +} +``` +#### /var/log/sysak/iosdiag/latency/result.log.stat +该日志文件描述的是在磁盘角度,所有IO的延迟分布统计信息 +```c +{ + "summary":[ //以磁盘为一个单位的数组 + { + "diskname":"vda", //磁盘盘符 + "delays":[ //以每个组件的延迟情况为单位的数组,目前涉及的组建为:block、driver、disk、complete + { + "component":"os(block)", //组建名 + "percent":"17.147%", //在捕获的该磁盘的IO中,经统计在此组件耗时的百分比 + "max":143422, //在此组件的最大耗时,单位us + "min":76, //在此组件的最小耗时,单位us + "avg":24518 //在此组件的平均耗时,单位us + }, + {"os(driver)" 延迟情况}, + {"disk" 延迟情况}, + { "os(complete)" 延迟情况} + ] + }, + {第二个磁盘}, + ... + ] +} +``` \ No newline at end of file diff --git a/source/tools/detect/iosdiag/data_analysis/Makefile b/source/tools/detect/iosdiag/data_analysis/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e2dba02433005b483889d4a48dbb09c5b46ff8e0 --- /dev/null +++ b/source/tools/detect/iosdiag/data_analysis/Makefile @@ -0,0 +1,5 @@ +target = iosdiag_data_analysis + +mods += $(target) + +include $(SRC)/mk/py.mk diff --git a/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py new file mode 100755 index 0000000000000000000000000000000000000000..53279b64d813462e42f693d732bb92c8a0b3ef98 --- /dev/null +++ b/source/tools/detect/iosdiag/data_analysis/iosdiag_data_analysis.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- + +import os +import json +import string +from collections import OrderedDict +import argparse +import re + +if os.geteuid() != 0: + print "This program must be run as root. Aborting." + sys.exit(0) + +def execCmd(cmd): + r = os.popen(cmd) + text = r.read() + r.close() + return text + +def humConvert(value): + units = ["B", "KB", "MB", "GB", "TB", "PB"] + size = 1024.0 + for i in range(len(units)): + if (value / size) < 1: + return "%.2f%s/s" % (value, units[i]) + value = value / size + +def timeConvert(value, withUnit): + units = ["us", "ms", "s"] + size = 1000.0 + for i in range(len(units)): + if (value / size) < 1: + if withUnit: + return "%.2f %s" % (value, units[i]) + else: + return "%.2f" % (value) + value = value / size + +class latencyAnalysis: + def __init__(self): + self.delayStatDicts = {} + self.delayDicts = {} + self.summaryDicts = {} + self.totalIosDicts = {} + self.totalDelayDicts = {} + self.diskIdxDicts = {} + self.totalDiskCnt = 0 + self.threshold = 0 + self.componentDicts = OrderedDict([('os(block)',0),('os(driver)',1),\ + ('disk',2),('os(complete)',3)]) + self.delayStatJsonStr = \ + '{ \ + "diskname":"","delays":[ \ + {"component":"os(block)","percent":"","max":0,"min":1000000000,"avg":0},\ + {"component":"os(driver)","percent":"","max":0,"min":1000000000,"avg":0},\ + {"component":"disk","percent":"","max":0,"min":1000000000,"avg":0}, \ + {"component":"os(complete)","percent":"","max":0,"min":1000000000,"avg":0}]\ + }' + newDelayStatDict = json.loads("["+self.delayStatJsonStr + "]", object_pairs_hook=OrderedDict) + self.delayStatDicts.setdefault('summary', newDelayStatDict) + self.entryDictJsonStr = \ + '{ \ + "diskname":"",\ + "slow ios":[] \ + }' + newSummaryDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict) + self.summaryDicts.setdefault('summary', newSummaryDict) + newDelayDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict) + self.delayDicts.setdefault('summary', newDelayDict) + + def __newDiskDict(self, disk): + if self.totalDiskCnt != 0: + newDelayStatDict = json.loads(self.delayStatJsonStr, object_pairs_hook=OrderedDict) + self.delayStatDicts['summary'].append(newDelayStatDict) + newSummaryDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict) + self.summaryDicts['summary'].append(newSummaryDict) + newDelayDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict) + self.delayDicts['summary'].append(newDelayDict) + self.delayStatDicts['summary'][self.totalDiskCnt]['diskname'] = disk + self.summaryDicts['summary'][self.totalDiskCnt]['diskname'] = disk + self.delayDicts['summary'][self.totalDiskCnt]['diskname'] = disk + self.totalDelayDicts.setdefault(disk, 0) + self.totalIosDicts.setdefault(disk, 0) + self.diskIdxDicts.setdefault(disk, self.totalDiskCnt) + self.totalDiskCnt += 1; + + def processLatencyDelays(self, sDict): + diskIdxDicts = self.diskIdxDicts + totalDelayDicts = self.totalDelayDicts + componentDicts = self.componentDicts + delayStatDicts = self.delayStatDicts + delayDicts = self.delayDicts + + disk = sDict['diskname'] + del sDict['diskname'] + totalDelayDicts[disk] += sDict['totaldelay'] + diskIdx = diskIdxDicts[disk] + delayDicts['summary'][diskIdx]['slow ios'].append(sDict) + for component,idx in componentDicts.items(): + delay = sDict['delays'][idx]['delay'] + if delay > delayStatDicts['summary'][diskIdx]['delays'][idx]['max']: + delayStatDicts['summary'][diskIdx]['delays'][idx]['max'] = delay + if delay < delayStatDicts['summary'][diskIdx]['delays'][idx]['min']: + delayStatDicts['summary'][diskIdx]['delays'][idx]['min'] = delay + delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] += delay + + def processLatencySummary(self, sDict): + diskIdxDicts = self.diskIdxDicts + summaryDicts = self.summaryDicts + + disk = sDict['diskname'] + diskIdx = diskIdxDicts[disk] + del sDict['diskname'] + listAbnormal=[i for i in sDict['abnormal'].split(' ') if i != '']; + delay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[0]), False) + totalDelay=timeConvert(int(listAbnormal[-2].strip('(').split(':')[1]), True) + sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+delay+":"+totalDelay+")" + summaryDicts['summary'][diskIdx]['slow ios'].append(sDict) + + def processOneLatencySeq(self, sDict): + totalIosDicts = self.totalIosDicts + + disk = sDict['diskname'] + if disk not in totalIosDicts.keys(): + self.__newDiskDict(disk) + + totalIosDicts[disk] += 1 + if "abnormal" in sDict: + self.processLatencySummary(sDict) + else: + self.processLatencyDelays(sDict) + + def latencyCalculate(self): + diskIdxDicts = self.diskIdxDicts + totalIosDicts = self.totalIosDicts + totalDelayDicts = self.totalDelayDicts + componentDicts = self.componentDicts + delayStatDicts = self.delayStatDicts + summaryDicts = self.summaryDicts + delayDicts = self.delayDicts + + for disk, diskIdx in diskIdxDicts.items(): + totalIosDicts[disk] /= 2 + totalIos = totalIosDicts[disk] + maxPercent = 0 + avgTotalDelay = totalDelayDicts[disk] / totalIos + for component,idx in componentDicts.items(): + delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] /= totalIos + avgDelay = delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] + #percent = avgDelay * 100.0 / avgTotalDelay + percent = round((avgDelay * 100.0 / avgTotalDelay), 3) + if percent > maxPercent: + maxPercent = percent + delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] = str(percent)+"%" + + summaryDicts['summary'][diskIdx]['slow ios']=\ + sorted(summaryDicts['summary'][diskIdx]['slow ios'],\ + key=lambda e:float(re.split(':| ', e['abnormal'])[-2]),\ + reverse=True) + delayDicts['summary'][diskIdx]['slow ios']=\ + sorted(delayDicts['summary'][diskIdx]['slow ios'],\ + key=lambda e:e['totaldelay'],\ + reverse=True) + + def latencyPrint(self, threshold): + diskIdxDicts = self.diskIdxDicts + totalIosDicts = self.totalIosDicts + summaryDicts = self.summaryDicts + delayStatDicts = self.delayStatDicts + componentDicts = self.componentDicts + + for disk, diskIdx in diskIdxDicts.items(): + totalIos = totalIosDicts[disk] + print("\n%d IOs of disk %s over %d ms, delay distribution:" %(totalIos, disk, threshold)) + for component,idx in componentDicts.items(): + percent = delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] + print("%-12s delay: %s" %(component, percent)) + + end = totalIos if totalIos < 10 else 10 + print("The first %d IOs with the largest delay, more details:" % end) + print("seq".ljust(6)+"comm".ljust(20)+"pid".ljust(10)+"iotype".ljust(8)+\ + "datalen".ljust(16)+"abnormal(delay:totaldelay)".ljust(40)) + + for i in range(0,end): + eDict=summaryDicts['summary'][diskIdx]['slow ios'][i] + print(str(eDict["seq"]).ljust(6)+eDict["comm"].ljust(20)+\ + str(eDict["pid"]).ljust(10)+eDict["iotype"].ljust(8)+\ + str(eDict["datalen"]).ljust(16)+eDict["abnormal"].ljust(40)) + +def latencyDataAnalysis(resultSeqFile, threshold): + analysis = latencyAnalysis() + f = open(resultSeqFile) + for line in f.readlines(): + try: + sDict = json.loads(line, object_pairs_hook=OrderedDict) + except ValueError: + continue + analysis.processOneLatencySeq(sDict) + f.close() + if analysis.totalDiskCnt == 0: + print("\n0 IOs over %d ms, everything is ok !^o^ ~" % int(threshold)) + return + analysis.latencyCalculate() + + f = open(resultSeqFile.strip('.seq')+".stat", 'w+') + f.write(json.dumps(analysis.delayStatDicts)) + f.close() + + f = open(resultSeqFile.strip('.seq'), 'w+') + f.write(json.dumps(analysis.summaryDicts)) + f.close() + + f = open(resultSeqFile, 'w+') + f.write(json.dumps(analysis.delayDicts)) + f.close() + analysis.latencyPrint(int(threshold)) + print("more details see %s*" % resultSeqFile.strip('.seq')) + +def main(): + examples = """e.g. + ./iosdiag_data_analysis.py -L -s -t 1000 -f ./result.log.seq //Statistic IO delay diagnosis results + ./iosdiag_data_analysis.py -L -g -t 1000 -f ./result.log.seq //Display IO delay diagnostic results graphically + """ + parser = argparse.ArgumentParser( + description="Analyze IO diagnostic data.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + parser.add_argument('-L','--latency', action='store_true', help='Analyze IO delay diagnostic data.') + parser.add_argument('-s','--stat', action='store_true', help='Statistic IO diagnosis results.') + parser.add_argument('-g','--graph', action='store_true', help='Display IO diagnostic results graphically.') + parser.add_argument('-t','--threshold', help='Specifies the threshold for the exception io.') + parser.add_argument('-f','--file', help='Specify the target data file to analyze.') + args = parser.parse_args() + + if args.latency: + latencyDataAnalysis(args.file, 1000 if args.threshold == None else args.threshold) + +if __name__ == "__main__": + main() + diff --git a/source/tools/detect/iosdiag/entry/Makefile b/source/tools/detect/iosdiag/entry/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f2f54ff8f09ebad8f032beca5c09e151782fd9b4 --- /dev/null +++ b/source/tools/detect/iosdiag/entry/Makefile @@ -0,0 +1,3 @@ +target = iosdiag + +include $(SRC)/mk/sh.mk diff --git a/source/tools/detect/iosdiag/entry/iosdiag.sh b/source/tools/detect/iosdiag/entry/iosdiag.sh new file mode 100755 index 0000000000000000000000000000000000000000..10e442c5fd049aeb9b63d3aa8b0a7b05be5e9c0b --- /dev/null +++ b/source/tools/detect/iosdiag/entry/iosdiag.sh @@ -0,0 +1,124 @@ +#!/bin/sh +#****************************************************************# +# ScriptName: iosdiag.sh +# Author: guangshui.lgs@alibaba-inc.com +# Create Date: 2021-07-02 11:44 +# Modify Author: $SHTERM_REAL_USER@alibaba-inc.com +# Modify Date: 2021-07-02 11:45 +# Function: +#***************************************************************# +if [ "$SYSAK_WORK_PATH" != "" ]; then +WORK_PATH=$SYSAK_WORK_PATH +else +WORK_PATH=/usr/local/sbin/.sysak_compoents +fi +TOOLS_PATH=$WORK_PATH/tools/`uname -r` +LIB_PATH=$WORK_PATH/lib/`uname -r` +latency_bin=$WORK_PATH/tools/latency +data_analysis=$WORK_PATH/tools/iosdiag_data_analysis +iosdiag_dir="/var/log/sysak/iosdiag" +logfile="$iosdiag_dir/$1/result.log.seq" +logfile_arg="-f $logfile" + +function usage() { + echo "Usage: sysak iosdiag [options] subcmd [cmdargs]]" + echo " subcmd:" + echo " latency, io latency diagnosis" + echo " cmdargs:" + echo " -h, help info" + echo " options:" + echo " -u url, transfer datafile to remote url" + echo " -s latency|[..], stop diagnosis" + #echo " -f logfile, output log file" +} + +function list() { + ls $TOOLS_PATH +} + +upload_data() { + datapath=$iosdiag_dir/$1 + cd $datapath + tar -zcf iosdiag_$1.tar.gz ./* + curl -i -q -X PUT -T iosdiag_$1.tar.gz $url + rm -f iosdiag_$1.tar.gz +} + +datafile_analysis() { + python $data_analysis --$1 --stat --file $logfile $threshold_arg +} + +enable_latency() { + if [ ! -e "$latency_bin" ]; then + echo "$latency_bin not found" + echo "iosdiag latency not support '$(uname -r)', please report to the developer" + exit -1 + fi + threshold=$(echo "$*"|awk -F "-t" '{print $2}'|awk '{print $1}') + [ "$threshold" != "" ] && { threshold_arg="-t $threshold"; } + { + flock -n 3 + [ $? -eq 1 ] && { echo "another latency is running."; exit -1; } + trap disable_latency SIGINT SIGTERM SIGQUIT + #mkdir -p `dirname $datafile` + chmod +x $latency_bin + rm $logfile_arg + $SYSAK_WORK_PATH/../sysak btf + $latency_bin $logfile_arg $* & + wait $! + disable_latency + } 3<> /tmp/latency.lock +} + +disable_latency() { + pid=`ps -ef | grep "\$latency_bin" | awk '{print $2}'` + if [ "$pid" != "" ] + then + kill -9 $pid 2>/dev/null + fi + + datafile_analysis latency + if [ -n "$url" ]; then + upload_data latency + fi + exit 0 +} + + +#execute command,every command need such args: +# -h/--help: command usage +# -f/--file: output files, default stdout +# output format jason +# -d/--disable +function execute() { + #echo cmd:$1 ${*:2} + enable_$1 ${*:2} +} + +while getopts 'hs:u:' OPT; do + case $OPT in + "u") + url=$OPTARG + ;; + "s") + diag_stop=true + subcmd=$OPTARG + ;; + *) + usage + exit 0 + ;; + esac +done + +if [ $diag_stop ]; then + echo "disable $subcmd" + disable_$subcmd + exit 0 +fi + +subcmd=${@:$OPTIND:1} +subargs=${*:$OPTIND+1}; +[ "$subcmd" != "latency" ] && { echo "not support subcmd $subcmd!!!"; usage; exit -1; } +execute $subcmd $subargs + diff --git a/source/tools/detect/iosdiag/latency/Makefile b/source/tools/detect/iosdiag/latency/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..7ad4403cafe09ddd5d6c102927c13a1aa4027795 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = latency + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h new file mode 100644 index 0000000000000000000000000000000000000000..85663a524de10d58c4e00974ab9c86ac5db12b2d --- /dev/null +++ b/source/tools/detect/iosdiag/latency/bpf_iosdiag_common.h @@ -0,0 +1,203 @@ +#ifndef __BPF_IOSDIAG_COMMON_H +#define __BPF_IOSDIAG_COMMON_H + +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct bpf_map_def SEC("maps") iosdiag_maps = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct iosdiag_key), + .value_size = sizeof(struct iosdiag_req), + .max_entries = 2048, +}; + +struct bpf_map_def SEC("maps") iosdiag_maps_targetdevt = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +static inline int iosdiag_pkg_check(void *data, unsigned int len) +{ + return 1; +} + +static unsigned int get_target_devt(void) +{ + unsigned int key = 0; + unsigned int *devt; + + devt = (unsigned int *)bpf_map_lookup_elem(&iosdiag_maps_targetdevt, + &key); + if (devt) + return *devt; + return 0; +} + +static void +init_iosdiag_key(unsigned int dev, unsigned long sector, struct iosdiag_key *key) +{ + key->dev = dev; + key->sector = sector; +} + +static inline int +trace_io_driver_route(struct pt_regs *ctx, struct request *req, enum ioroute_type type) +{ + struct iosdiag_req *ioreq; + struct iosdiag_req new_ioreq = {0}; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + dev_t dev; + sector_t sector; + struct gendisk *rq_disk; + struct hd_struct *part; + struct device *device; + u32 target_devt = get_target_devt(); + + bpf_probe_read(&part, sizeof(struct hd_struct *), &req->part); + bpf_probe_read(&device, sizeof(struct device *), &part->__dev); + bpf_probe_read(&dev, sizeof(dev_t), &device->devt); + if (target_devt && dev != target_devt) + return 0; + + bpf_probe_read(§or, sizeof(sector_t), &req->__sector); + + init_iosdiag_key(dev, sector, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); + if (ioreq) { + if (!ioreq->ts[type]) + ioreq->ts[type] = now; + if (ioreq->diskname[0] == '\0') { + bpf_probe_read(&rq_disk, sizeof(struct gendisk *), &req->rq_disk); + bpf_probe_read(ioreq->diskname, sizeof(ioreq->diskname), &rq_disk->disk_name); + } + if (type == IO_RESPONCE_DRIVER_POINT) + ioreq->cpu[1] = bpf_get_smp_processor_id(); + } else + return 0; + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + return 0; +} + +struct block_getrq_args { + struct trace_entry ent; + unsigned int dev; + unsigned long sector; + unsigned int nr_sector; + char rwbs[8]; + char comm[16]; +}; + +SEC("tracepoint/block/block_getrq") +static int tracepoint_block_getrq(struct block_getrq_args *args) +{ + struct iosdiag_req new_ioreq = {0}; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + u32 target_devt = get_target_devt(); + + if (target_devt && args->dev != target_devt) + return 0; + + new_ioreq.cpu[0] = new_ioreq.cpu[1] = new_ioreq.cpu[2] = -1; + init_iosdiag_key(args->dev, args->sector, &key); + if (pid) + memcpy(new_ioreq.comm, args->comm, sizeof(args->comm)); + new_ioreq.ts[IO_START_POINT] = now; + new_ioreq.pid = pid; + memcpy(new_ioreq.op, args->rwbs, sizeof(args->rwbs)); + new_ioreq.sector = args->sector; + new_ioreq.data_len = args->nr_sector * 512; + new_ioreq.cpu[0] = bpf_get_smp_processor_id(); + bpf_map_update_elem(&iosdiag_maps, &key, &new_ioreq, BPF_ANY); + return 0; +} + +struct block_rq_issue_args { + struct trace_entry ent; + unsigned int dev; + unsigned long sector; + unsigned int nr_sector; + unsigned int bytes; + char rwbs[8]; + char comm[16]; + char cmd[0]; +}; + +SEC("tracepoint/block/block_rq_issue") +static int tracepoint_block_rq_issue(struct block_rq_issue_args *args) +{ + struct iosdiag_req *ioreq; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + int type = IO_ISSUE_DRIVER_POINT; + u32 target_devt = get_target_devt(); + + if (target_devt && args->dev != target_devt) + return 0; + + init_iosdiag_key(args->dev, args->sector, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); + if (ioreq) { + if (ioreq->ts[type]) + type = IO_ISSUE_DEVICE_POINT; + ioreq->ts[type] = now; + + if (args->bytes) + ioreq->data_len = args->bytes; + else if (args->nr_sector) + ioreq->data_len = args->nr_sector * 512; + } else + return 0; + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + return 0; +} + +struct block_rq_complete_args { + struct trace_entry ent; + dev_t dev; + sector_t sector; + unsigned int nr_sector; + int errors; + char rwbs[8]; + char cmd[0]; +}; + +SEC("tracepoint/block/block_rq_complete") +static int tracepoint_block_rq_complete(struct block_rq_complete_args *args) +{ + struct iosdiag_req *ioreq; + struct iosdiag_req new_ioreq = {0}; + struct iosdiag_key key = {0}; + unsigned long long now = bpf_ktime_get_ns(); + u32 target_devt = get_target_devt(); + + if (target_devt && args->dev != target_devt) + return 0; + + init_iosdiag_key(args->dev, args->sector, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); + if (ioreq) { + if (!ioreq->ts[IO_COMPLETE_TIME_POINT]) + ioreq->ts[IO_COMPLETE_TIME_POINT] = now; + if (ioreq->ts[IO_ISSUE_DEVICE_POINT] && + ioreq->ts[IO_RESPONCE_DRIVER_POINT]) + ioreq->complete = 1; + ioreq->cpu[2] = bpf_get_smp_processor_id(); + } else + return 0; + if (ioreq->complete) + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + else + bpf_map_delete_elem(&iosdiag_maps, &key); + return 0; +} +#endif diff --git a/source/tools/detect/iosdiag/latency/collect.c b/source/tools/detect/iosdiag/latency/collect.c new file mode 100644 index 0000000000000000000000000000000000000000..48a26aed8fa7db9dd41081b97ca777b92593d011 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/collect.c @@ -0,0 +1,275 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ebpf_load.h" +#include "iosdiag.h" +#include "format_json.h" +#include + +#define min(x, y) ((x) > (y) ? (y) : (x)) + +#define DECLEAR_BPF_OBJ(name) \ + static struct name##_bpf *name; \ + static int name##_bpf_load; \ + +DECLEAR_BPF_OBJ(iosdiag_virtblk); +DECLEAR_BPF_OBJ(iosdiag_nvme); +DECLEAR_BPF_OBJ(iosdiag_scsi); +static int iosdiag_map; +static int iosdiag_virtblk_map; +static int iosdiag_maps_targetdevt; +static int g_stop; + +extern unsigned long get_threshold_us(void); +static int exec_shell_cmd(char *cmd) +{ + char buf[64]; + FILE *fp; + + if (!cmd) + return -1; + + if ((fp = popen(cmd, "r")) == NULL) { + fprintf(stderr, "exec \'%s\' fail\n", cmd); + return -1; + } + + while (fgets(buf, sizeof(buf) - 1, fp)); + pclose(fp); + return 0; +} + +static int over_threshold(struct iosdiag_req *iop) +{ + unsigned long threshold_ns = get_threshold_us() * 1000; + unsigned long delay_ns = iop->ts[IO_COMPLETE_TIME_POINT] - + iop->ts[IO_START_POINT]; + + if (threshold_ns && delay_ns >= threshold_ns) + return 1; + return 0; +} + +static void iosdiag_store_result(int fd) +{ + struct iosdiag_key key, next_key; + struct iosdiag_req iop; + unsigned long sleep_us = get_threshold_us() ? get_threshold_us() : 1000; + char *buf; + int i = 0; + unsigned int seq = 0; + + printf("running..."); + fflush(stdout); + buf = malloc(JSON_BUFFER_SIZE); + memset(buf, 0x0, JSON_BUFFER_SIZE); + while (!g_stop) { + if (bpf_map_get_next_key(iosdiag_map, &key, &next_key) == 0) { + bpf_map_lookup_elem(iosdiag_map, &next_key, &iop); + if (iop.complete) { + if (over_threshold(&iop)) { + seq++; + set_check_time_date(); + summary_convert_to_json(buf, &iop, seq); + delay_convert_to_json(buf + strlen(buf), &iop, seq); + write(fd, buf, strlen(buf)); + } + bpf_map_delete_elem(iosdiag_map, &next_key); + } + key = next_key; + if (i++ > 50) { + usleep(sleep_us); + i = 0; + } + } else + usleep(sleep_us); + } + free(buf); + printf("done\n"); +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + //return vfprintf(stderr, format, args); + return 0; +} + +static void iosdiag_stop(int signo) +{ + //printf("iosdiag stop!\n"); + g_stop = 1; +} + +#define LOAD_IOSDIAG_BPF(name, load_map) \ +({ \ + __label__ out; \ + int __ret = 0; \ + printf("start %s load bpf\n", #name); \ + name = name##_bpf__open(); \ + if (!name) { \ + printf("load bpf error\n"); \ + printf("load %s bpf fail\n", #name); \ + __ret = -1; \ + goto out; \ + } \ + if (name##_bpf__load(name)) { \ + printf("load bpf prog error\n"); \ + printf("load %s bpf fail\n", #name); \ + name##_bpf__destroy(name); \ + __ret = -1; \ + goto out; \ + } \ + if (name##_bpf__attach(name)) { \ + printf("attach bpf prog error\n"); \ + printf("load %s bpf fail\n", #name); \ + name##_bpf__destroy(name); \ + __ret = -1; \ + goto out; \ + } \ + if (load_map) { \ + iosdiag_map = bpf_map__fd(name->maps.iosdiag_maps); \ + iosdiag_maps_targetdevt = bpf_map__fd(name->maps.iosdiag_maps_targetdevt); \ + } \ + if (!__ret) \ + printf("load %s bpf success\n", #name); \ + name##_bpf_load = 1; \ +out: \ + __ret; \ +}) + +static unsigned int get_devt_by_devname(char *devname) +{ + char sys_file[64]; + char cmd[128]; + char dev[16]; + FILE *fp; + int major, minor; + + sprintf(sys_file, "/sys/block/%s/dev", devname); + if (access(sys_file, F_OK)) + sprintf(sys_file, "/sys/block/*/%s/../dev", devname); + + sprintf(cmd, "cat %s 2>/dev/null", sys_file); + if ((fp = popen(cmd, "r")) == NULL) { + fprintf(stderr, "exec \'%s\' fail\n", cmd); + return 0; + } + + while (fgets(dev, sizeof(dev) - 1, fp)) { + if (sscanf(dev, "%d:%d", &major, &minor) != 2) { + pclose(fp); + return 0; + } + } + pclose(fp); + return ((major << 20) | minor); +} + +static char *get_module_name_by_devname(char *devname) +{ + char sys_file[64] = {0}; + char file_path[PATH_MAX] = {0}; + int ret; + + sprintf(sys_file, "/sys/class/block/%s", devname); + ret = readlink(sys_file, file_path, PATH_MAX); + if (ret < 0 || ret >= PATH_MAX) + return "none"; + if (strstr(file_path, "virtio")) + return "virtblk"; + else if (strstr(file_path, "nvme")) + return "nvme"; + else if (strstr(file_path, "target")) + return "scsi"; + return "none"; +} + +int iosdiag_init(char *devname) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int key = 0; + unsigned int target_devt = get_devt_by_devname(devname); + char *module_name = get_module_name_by_devname(devname); + + setrlimit(RLIMIT_MEMLOCK, &r); + + libbpf_set_print(libbpf_print_fn); + if (!strcmp(module_name, "virtblk")) { + if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1)) + return -1; + } else if (!strcmp(module_name, "nvme")) { + if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1)) + return -1; + } else if (!strcmp(module_name, "scsi")) { + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1)) + return -1; + } else { + if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1)) { + if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1)) { + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1)) + return -1; + } else { + LOAD_IOSDIAG_BPF(iosdiag_scsi, 0); + } + } else { + LOAD_IOSDIAG_BPF(iosdiag_nvme, 0); + LOAD_IOSDIAG_BPF(iosdiag_scsi, 0); + } + } + if (iosdiag_virtblk_bpf_load) + iosdiag_virtblk_map = + bpf_map__fd(iosdiag_virtblk->maps.iosdiag_virtblk_maps); + if (target_devt) + bpf_map_update_elem(iosdiag_maps_targetdevt, &key, &target_devt, BPF_ANY); + return 0; +} + +int iosdiag_run(int timeout, char *output_file) +{ + int fd_log; + char filepath[256]; + char cmd[272]; + + if (strlen(output_file) > (sizeof(filepath) - 1)) { + printf("error: output file name(%s) too large(max %lu bytes)\n", + output_file, sizeof(filepath)); + return -1; + } + strcpy(filepath, output_file); + sprintf(cmd, "mkdir %s -p", dirname(filepath)); + exec_shell_cmd(cmd); + fd_log = open(output_file, O_RDWR | O_CREAT, 0755); + if (fd_log < 0) { + printf("error: create output file \"%s\" fail\n", output_file); + return -1; + } + signal(SIGINT, iosdiag_stop); + signal(SIGALRM, iosdiag_stop); + if (timeout) + alarm(timeout); + iosdiag_store_result(fd_log); + close(fd_log); + return 0; +} + +void iosdiag_exit(char *module_name) +{ + if (iosdiag_virtblk_bpf_load) + iosdiag_virtblk_bpf__destroy(iosdiag_virtblk); + if (iosdiag_nvme_bpf_load) + iosdiag_nvme_bpf__destroy(iosdiag_nvme); + if (iosdiag_scsi_bpf_load) + iosdiag_scsi_bpf__destroy(iosdiag_scsi); +} + diff --git a/source/tools/detect/iosdiag/latency/ebpf_load.h b/source/tools/detect/iosdiag/latency/ebpf_load.h new file mode 100644 index 0000000000000000000000000000000000000000..8face9a55e9e6505ae07e7ad8d8440b237948c6d --- /dev/null +++ b/source/tools/detect/iosdiag/latency/ebpf_load.h @@ -0,0 +1,8 @@ +#ifndef _EBPF_LOAD_H +#define _EBPF_LOAD_H +#include +#include +#include "iosdiag_virtblk.skel.h" +#include "iosdiag_nvme.skel.h" +#include "iosdiag_scsi.skel.h" +#endif diff --git a/source/tools/detect/iosdiag/latency/format_json.c b/source/tools/detect/iosdiag/latency/format_json.c new file mode 100644 index 0000000000000000000000000000000000000000..a01a75289fd5c018646ecff281dbd6bdf9878725 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/format_json.c @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" +#include "format_json.h" +#include + +#define min(x, y) ((x) > (y) ? (y) : (x)) +struct ts_info { + char *str; + int idx; +}; + +struct ts_info g_points[] = { + {"start", IO_START_POINT}, + {"issue_driver", IO_ISSUE_DRIVER_POINT}, + {"issue_device", IO_ISSUE_DEVICE_POINT}, + {"device_complete", IO_RESPONCE_DRIVER_POINT}, + {"complete", IO_COMPLETE_TIME_POINT}, +}; + +struct ts_info g_delays[] = { + //{"total", IO_START_POINT}, + {"block", IO_ISSUE_DRIVER_POINT}, + {"driver", IO_ISSUE_DEVICE_POINT}, + {"disk", IO_RESPONCE_DRIVER_POINT}, + {"complete", IO_COMPLETE_TIME_POINT}, +}; + +static char *g_check_date; + +static char *point_idx_to_str(int idx) +{ + int i = 0; + + for (; i < (sizeof(g_points) / sizeof(g_points[0])); i++) { + if (g_points[i].idx == idx) + return g_points[i].str; + } + return NULL; +} + +static char *delay_idx_to_str(int idx) +{ + int i = 0; + + for (; i < (sizeof(g_delays) / sizeof(g_delays[0])); i++) { + if (g_delays[i].idx == idx) + return g_delays[i].str; + } + return NULL; +} + +void set_check_time_date(void) +{ + time_t t; + struct tm *date; + + t = time(NULL); + + date = localtime(&t); + g_check_date = asctime(date); + g_check_date[24] = '\0'; +} + +static char *get_check_time_date(void) +{ + return g_check_date; +} + +static unsigned long get_total_delay(struct iosdiag_req *iop) +{ + return iop->ts[MAX_POINT - 1] / 1000 - iop->ts[IO_START_POINT] / 1000; +} + +static unsigned long get_max_delay(struct iosdiag_req *iop) +{ + int i; + unsigned long delay; + unsigned long max_delay = 0; + + for (i = IO_START_POINT + 1; i < MAX_POINT; i++) { + delay = iop->ts[i] / 1000 - iop->ts[i - 1] / 1000; + if (max_delay < delay) + max_delay = delay; + } + return max_delay; +} + +static char *get_max_delay_component(struct iosdiag_req *iop) +{ + int i, idx = 0; + unsigned long delay; + unsigned long max_delay = 0; + + for (i = IO_START_POINT + 1; i < MAX_POINT; i++) { + delay = iop->ts[i] / 1000 - iop->ts[i - 1] / 1000; + if (max_delay < delay) { + max_delay = delay; + idx = i; + } + } + return idx != 0 ? delay_idx_to_str(idx) : ""; +} + +static int is_disk_delay(struct iosdiag_req *iop) +{ + if (strcmp(get_max_delay_component(iop), "disk")) + return 0; + return 1; +} + +void point_convert_to_json(void *dest, void *src, unsigned int seq) +{ + int i; + struct iosdiag_req *iop = src; + + sprintf(dest, + "{\"seq\":\"%u\"," + "\"diskname\":\"%s\"," + "\"points\":[", seq, iop->diskname); + for (i = 0; i < MAX_POINT; i++) { + if (!iop->ts[i]) + continue; + sprintf(dest + strlen(dest), + "{\"point\":\"%s\",\"ts\":%llu}", + point_idx_to_str(i), (iop->ts[i] / 1000)); + if (i != (MAX_POINT - 1)) + sprintf(dest + strlen(dest), "%s", ","); + } + sprintf(dest + strlen(dest), "%s", "]}\n"); +} + +void delay_convert_to_json(void *dest, void *src, unsigned int seq) +{ + int i, n; + int skip = 0; + unsigned long delay; + struct iosdiag_req *iop = src; + + sprintf(dest, + "{\"seq\":\"%u\"," + "\"diskname\":\"%s\",", + seq, + iop->diskname); + for (i = 0, n = 0; i < MAX_POINT; i++) { + if (i == IO_START_POINT) { + delay = iop->ts[MAX_POINT - 1] / 1000 - + iop->ts[IO_START_POINT] / 1000; + sprintf(dest + strlen(dest), + "\"totaldelay\":%lu," + "\"delays\":[", + delay); + continue; + } else { + if (!skip) + n = i - 1; + if (iop->ts[i] > iop->ts[n]) { + delay = iop->ts[i] / 1000 - iop->ts[n] / 1000; + skip = 0; + } else { + skip = 1; + continue; + } + } + sprintf(dest + strlen(dest), + "{\"component\":\"%s\",\"delay\":%lu}", + delay_idx_to_str(i), delay); + if (i != (MAX_POINT - 1)) + sprintf(dest + strlen(dest), "%s", ","); + } + sprintf(dest + strlen(dest), "%s", "]}\n"); +} + +void summary_convert_to_json(void *dest, void *src, unsigned int seq) +{ + char cpu[24] = {0}; + char component[16] = {0}; + struct iosdiag_req *iop = src; + char *maxdelay_component = get_max_delay_component(iop); + unsigned long max_delay = get_max_delay(iop); + unsigned long total_delay = get_total_delay(iop); + + if (!is_disk_delay(iop)) { + sprintf(component, "os(%s)", maxdelay_component); + maxdelay_component = component; + } + + if (iop->cpu[0] == iop->cpu[1] && iop->cpu[1] == iop->cpu[2]) + sprintf(cpu, "%d", iop->cpu[0]); + else + sprintf(cpu, "%d -> %d -> %d", + iop->cpu[0], iop->cpu[1], iop->cpu[2]); + //blk_rq_op_name(iop->cmd_flags, buf, sizeof(buf)); + sprintf(dest, + "{\"seq\":\"%u\"," + "\"time\":\"%s\"," + "\"abnormal\":\"%s delay (%lu:%lu us)\"," + "\"diskname\":\"%s\"," + "\"iotype\":\"%s\"," + "\"sector\":%lu," + "\"datalen\":%u," + "\"comm\":\"%s\"," + "\"pid\":%d," + "\"cpu\":\"%s\"}\n", + seq, + get_check_time_date(), + maxdelay_component, + max_delay, + total_delay, + iop->diskname, + iop->op, + iop->sector, + iop->data_len, + iop->comm, + iop->pid, + cpu); +} + diff --git a/source/tools/detect/iosdiag/latency/format_json.h b/source/tools/detect/iosdiag/latency/format_json.h new file mode 100644 index 0000000000000000000000000000000000000000..63de88df212bd2e0f5f308fceb062569bef368e6 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/format_json.h @@ -0,0 +1,10 @@ +#ifndef _FORMAT_JSON_H +#define _FORMAT_JSON_H + +#define JSON_BUFFER_SIZE 4096 +void set_check_time_date(void); +void summary_convert_to_json(void *dest, void *src, unsigned int seq); +void delay_convert_to_json(void *dest, void *src, unsigned int seq); +void point_convert_to_json(void *dest, void *src, unsigned int seq); +#endif + diff --git a/source/tools/detect/iosdiag/latency/iosdiag.h b/source/tools/detect/iosdiag/latency/iosdiag.h new file mode 100644 index 0000000000000000000000000000000000000000..cada8db4309e94f2f0d1a6685ddbb9b65880d4a0 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag.h @@ -0,0 +1,47 @@ +#ifndef __IOSDIAG__ +#define __IOSDIAG__ + +#define COUNT (10) +#define BPF_ANY (0) +#define IOSDIAG_PKG_MAGIC 0x494F5049 + +#define REQ_OP_BITS 8 +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define MAX_STACK_DEPTH 12 + +enum ioroute_type{ + IO_START_POINT, + IO_ISSUE_DRIVER_POINT, + IO_ISSUE_DEVICE_POINT, + IO_RESPONCE_DRIVER_POINT, + IO_COMPLETE_TIME_POINT, + MAX_POINT, +}; + +struct iosdiag_req { + pid_t pid; + char comm[16]; + char diskname[32]; + unsigned long long ts[MAX_POINT]; + unsigned int cpu[3]; + unsigned int complete; + //unsigned int cmd_flags; + char op[8]; + unsigned int data_len; + unsigned long sector; +}; + +struct iosdiag_key { +#if 0 + int cpu; + unsigned long long start_time_ns; +// unsigned long long io_start_time_ns; +#endif + unsigned int dev; + unsigned long sector; +}; + +int iosdiag_init(char *module_name); +int iosdiag_run(int timeout, char *output_file); +void iosdiag_exit(char *module_name); +#endif diff --git a/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..4bb344b5ba7a9cae4e556940815292a58e8b79b2 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag_nvme.bpf.c @@ -0,0 +1,35 @@ +#include "bpf_iosdiag_common.h" + +SEC("kprobe/nvme_queue_rq") +int kprobe_nvme_queue_rq(struct pt_regs *ctx) +{ + struct blk_mq_queue_data *bd = + (struct blk_mq_queue_data *)PT_REGS_PARM2(ctx); + bool kick; + struct request *req; + + bpf_probe_read(&kick, sizeof(bool), &bd->last); + if (!kick) + return 0; + + bpf_probe_read(&req, sizeof(struct request *), &bd->rq); + if (!req) { + bpf_printk("kprobe_nvme_queue_rq: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_ISSUE_DRIVER_POINT); +} + +SEC("kprobe/blk_mq_complete_request") +int kprobe_blk_mq_complete_request(struct pt_regs *ctx) +{ + struct request *req = (struct request *)PT_REGS_PARM1(ctx); + + if (!req) { + bpf_printk("kprobe_blk_mq_complete_request: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); +} +char _license[] SEC("license") = "GPL"; + diff --git a/source/tools/detect/iosdiag/latency/iosdiag_scsi.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_scsi.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..e136c4344fe33f14dc1cca567a77626bfe69c145 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag_scsi.bpf.c @@ -0,0 +1,31 @@ +#include "bpf_iosdiag_common.h" + +SEC("kprobe/scsi_dispatch_cmd") +int kprobe_scsi_dispatch_cmd(struct pt_regs *ctx) +{ + struct scsi_cmnd *cmd = (struct scsi_cmnd *)PT_REGS_PARM1(ctx); + struct request *req; + + bpf_probe_read(&req, sizeof(struct request *), &cmd->request); + if (!req) { + bpf_printk("kprobe_scsi_dispatch_cmd: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_ISSUE_DEVICE_POINT); +} + +SEC("kprobe/scsi_done") +int kprobe_scsi_done(struct pt_regs *ctx) +{ + struct scsi_cmnd *cmd = (struct scsi_cmnd *)PT_REGS_PARM1(ctx); + struct request *req; + + bpf_probe_read(&req, sizeof(struct request *), &cmd->request); + if (!req) { + bpf_printk("kprobe_scsi_done: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); +} +char _license[] SEC("license") = "GPL"; + diff --git a/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..1f801e14b45085d3392bb9dfd029c4ba98860754 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/iosdiag_virtblk.bpf.c @@ -0,0 +1,61 @@ +#include "bpf_iosdiag_common.h" + +struct bpf_map_def SEC("maps") iosdiag_virtblk_maps = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(pid_t), + .value_size = sizeof(unsigned long), + .max_entries = 20480, +}; + +SEC("kprobe/virtio_queue_rq") +int kprobe_virtio_queue_rq(struct pt_regs *ctx) +{ + struct blk_mq_queue_data *bd = + (struct blk_mq_queue_data *)PT_REGS_PARM2(ctx); + bool kick; + unsigned long req_addr; + pid_t pid = bpf_get_current_pid_tgid(); + + bpf_probe_read(&kick, sizeof(bool), &bd->last); + if (!kick) + return 0; + + bpf_probe_read(&req_addr, sizeof(struct request *), &bd->rq); + if (!req_addr) { + bpf_printk("kprobe_virtio_queue_rq: con't get request"); + return 0; + } + bpf_map_update_elem(&iosdiag_virtblk_maps, &pid, &req_addr, BPF_ANY); + return 0; +} + +SEC("kretprobe/virtio_queue_rq") +int kretprobe_virtio_queue_rq(struct pt_regs *ctx) +{ + int ret = PT_REGS_RC(ctx); + unsigned long *req_addr; + pid_t pid = bpf_get_current_pid_tgid(); + + if (!ret) { + req_addr = bpf_map_lookup_elem(&iosdiag_virtblk_maps, &pid); + if (!req_addr || !(*req_addr)) + return 0; + trace_io_driver_route(ctx, (struct request *)*req_addr, IO_ISSUE_DEVICE_POINT); + } + bpf_map_delete_elem(&iosdiag_virtblk_maps, &pid); + return 0; +} + +SEC("kprobe/blk_mq_complete_request") +int kprobe_blk_mq_complete_request(struct pt_regs *ctx) +{ + struct request *req = (struct request *)PT_REGS_PARM1(ctx); + + if (!req) { + bpf_printk("kprobe_blk_mq_complete_request: con't get request"); + return 0; + } + return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); +} +char _license[] SEC("license") = "GPL"; + diff --git a/source/tools/detect/iosdiag/latency/main.c b/source/tools/detect/iosdiag/latency/main.c new file mode 100644 index 0000000000000000000000000000000000000000..9ea70d9f663e1eca04eadf4967f5c1a603665ea6 --- /dev/null +++ b/source/tools/detect/iosdiag/latency/main.c @@ -0,0 +1,72 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +static void usage(void) +{ + fprintf(stdout, + "\nUsage: \n" + "latency [OPTION] disk_devname Detect IO latency in specified disk\n" + "latency -t ms disk_devname Set IO latency threshold(default 1000ms)\n" + "latency -T sec disk_devname How long to detect IO latency(default 10s)\n" + "latency -f log disk_devname Specify the output file log\n" + "\ne.g.\n" + "latency vda Detect IO latency in disk \"vda\"\n" + "latency -t 10 vda Set IO latency threshold 10ms and detect IO latency in disk \"vda\"\n" + "latency -t 10 -T 30 vda Detect IO latency in disk \"vda\" 30 secs\n"); + exit(-1); +} + +static unsigned long g_threshold_us; +unsigned long get_threshold_us(void) +{ + return g_threshold_us; +} + +int main(int argc, char *argv[]) +{ + int ch; + int timeout_s = 10, threshold_ms = 1000; + char *result_file = "/var/log/sysak/iosdiag/latency/result.log"; + char *devname; + + while ((ch = getopt(argc, argv, "T:t:f:h")) != -1) { + switch (ch) { + case 'T': + timeout_s = (unsigned int)strtoul(optarg, NULL, 0); + break; + case 't': + threshold_ms = (int)strtoul(optarg, NULL, 0); + break; + case 'f': + result_file = optarg; + break; + case 'h': + default: + usage(); + } + } + devname = argv[argc - 1]; + g_threshold_us = threshold_ms * 1000; + if (iosdiag_init(devname)) { + fprintf(stderr, "iosdiag_init fail\n"); + return -1; + } + iosdiag_run(timeout_s, result_file); + iosdiag_exit(devname); + return 0; +} +