diff --git a/rpm/sysak-build-nodep.sh b/rpm/sysak-build-nodep.sh index 89ac80c9933db3e1aa828fdffbe30455412191ae..9afe21a5d1c7ea0b275a7125428acb1674a8a6e3 100755 --- a/rpm/sysak-build-nodep.sh +++ b/rpm/sysak-build-nodep.sh @@ -38,11 +38,13 @@ fi %install mkdir -p \$RPM_BUILD_ROOT/usr/bin +mkdir -p \$RPM_BUILD_ROOT/etc/sysak mkdir -p \$RPM_BUILD_ROOT/usr/local/sysak/log mkdir -p \$RPM_BUILD_ROOT/usr/lib/systemd/system/ /bin/cp -rf $BUILD_DIR/.sysak_components \$RPM_BUILD_ROOT/usr/local/sysak/.sysak_components /bin/cp -rf $BUILD_DIR/sysak \$RPM_BUILD_ROOT/usr/bin/ /bin/cp -f $BUILD_DIR/.sysak_components/tools/monitor/sysakmon.conf \$RPM_BUILD_ROOT/usr/local/sysak +/bin/cp -f $BUILD_DIR/.sysak_components/tools/dist/app/collector/plugin.yaml \$RPM_BUILD_ROOT/etc/sysak/ /bin/cp $SOURCE_DIR/rpm/sysak.service \$RPM_BUILD_ROOT/usr/lib/systemd/system/ /bin/cp $SOURCE_DIR/rpm/sysak_server.conf \$RPM_BUILD_ROOT/usr/local/sysak/ @@ -58,6 +60,7 @@ fi rm -rf /usr/local/sysak %files +/etc/sysak /usr/local/sysak /usr/bin/sysak /usr/lib/systemd/system/sysak.service @@ -83,7 +86,7 @@ main() { export LINUX_VERSION=$(uname -r) - TARGET_LIST="--enable-target-all --enable-static --disable-target-rtrace --disable-target-PingTrace" + TARGET_LIST="--enable-target-all" build_rpm } diff --git a/source/lib/internal/ebpf/coolbpf b/source/lib/internal/ebpf/coolbpf index 0562b1397b8a8997b16d752d874dc5ad74149149..8db5ed3383f9c4521b33514c9b03d31581d65e6c 160000 --- a/source/lib/internal/ebpf/coolbpf +++ b/source/lib/internal/ebpf/coolbpf @@ -1 +1 @@ -Subproject commit 0562b1397b8a8997b16d752d874dc5ad74149149 +Subproject commit 8db5ed3383f9c4521b33514c9b03d31581d65e6c diff --git a/source/tools/monitor/ioMonitor/Makefile b/source/tools/monitor/ioMonitor/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..1345670e82b6c09a9ae9dccd7af1ae9d1c7dfaea --- /dev/null +++ b/source/tools/monitor/ioMonitor/Makefile @@ -0,0 +1,4 @@ +mods = ioMon +target := ioMonitor + +include $(SRC)/mk/sh.mk diff --git a/source/tools/monitor/ioMonitor/README.md b/source/tools/monitor/ioMonitor/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4856ff901df85de226f1ff34314d68dfd311d54f --- /dev/null +++ b/source/tools/monitor/ioMonitor/README.md @@ -0,0 +1,2 @@ +# 功能说明 +监控服务主程序,收集系统监控指标,支持查看历史监控数据 diff --git a/source/tools/monitor/ioMonitor/ioMon/__init__.py b/source/tools/monitor/ioMonitor/ioMon/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb8e4b62acc8012add0dc0175a04b6b4f51940ec --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +if __name__ == "__main__": + pass diff --git a/source/tools/monitor/ioMonitor/ioMon/displayClass.py b/source/tools/monitor/ioMonitor/ioMon/displayClass.py new file mode 100755 index 0000000000000000000000000000000000000000..cc51352ecf36ffa2b488a22b5e1c94961e544d00 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/displayClass.py @@ -0,0 +1,510 @@ +# -*- coding: utf-8 -*- + +import os +import sys +import string +import time +import re +import json +import threading +from collections import OrderedDict +from nfPut import CnfPut + + +def bwToValue(bw): + units = ["B", "KB", "MB", "GB", "TB", "PB"] + if str(bw) == '0': + return 0 + for i in range(5, -1, -1): + if units[i] in bw: + return float(bw.split(units[i])[0]) * pow(1024, i) + + +def humConvert(value): + units = ["B", "KB", "MB", "GB", "TB", "PB"] + size = 1024.0 + + if value == 0: + return value + for i in range(len(units)): + if (value / size) < 1: + return "%.1f%s/s" % (value, units[i]) + value = value / size + + +def iolatencyResultReport(*argvs): + result = [] + nf = argvs[1] + nfPutPrefix = str(argvs[2]) + statusReportDicts = argvs[3] + ioburst = False + nfPrefix = [] + iolatStartT = statusReportDicts['iolatency']['startT'] + iolatEndT = statusReportDicts['iolatency']['endT'] + ioutilStartT = statusReportDicts['ioutil']['startT'] + ioutilEndT = statusReportDicts['ioutil']['endT'] + lastIOburstT = statusReportDicts['iolatency']['lastIOburstT'] + + # If IO burst occurs in the short term(first 180 secs or next 60secs) or + # during IO delay diagnosis, it should be considered as one of + # the delay factors + if iolatStartT - lastIOburstT < 300 or \ + (ioutilStartT >= (iolatStartT - 300) and ioutilEndT <= (iolatEndT + 60)): + statusReportDicts['iolatency']['lastIOburstT'] = iolatStartT + ioburst = True + + os.system('ls -rtd '+argvs[0]+'/../* | head -n -5 | '\ + 'xargs --no-run-if-empty rm {} -rf') + if os.path.exists(argvs[0]+'/result.log.stat'): + with open(argvs[0]+'/result.log.stat') as logF: + data = logF.readline() + else: + return + try: + stat = json.loads(data, object_pairs_hook=OrderedDict) + except Exception: + return + + for ds in stat['summary']: + delays = sorted(ds['delays'], + key=lambda e: (float(e['percent'].strip('%'))), + reverse=True) + maxDelayComp = delays[0]['component'] + maxDelayPercent = float(delays[0]['percent'].strip('%')) + avgLat = format(sum([d['avg'] for d in delays])/1000.0, '.3f') + diagret = 'diagret=\"IO delay(AVG %sms) detected in %s\"' % ( + avgLat, str(ds['diskname'])) + nfPrefix.append(',diag_type=IO-Delay,devname='+str(ds['diskname'])) + + if ioburst and {maxDelayComp, delays[1]['component']}.issubset( + ['disk', 'os(block)']): + if (delays[0]['avg'] / delays[1]['avg']) < 10: + suggest = 'solution=\"reduce IO pressure. Refer to the '\ + 'diagnosis of IO-Burst and optimize some tasks\"' + diskIdx = 0 + if maxDelayComp == 'os(block)': + diskIdx = 1 + reason = ( + 'reason=\"IO burst occurs, too mang IO backlogs'\ + '(disk avg/max lat:%s/%s ms, lat percent:%s,'\ + ' OS dispatch avg/max lat:%s/%s ms, lat percent:%s\"' % + (str(delays[diskIdx]['avg'] / 1000.000), + str(delays[diskIdx]['max'] / 1000.000), + str(delays[diskIdx]['percent']), + str(delays[1 - diskIdx]['avg'] / 1000.000), + str(delays[1 - diskIdx]['max'] / 1000.000), + str(delays[1 - diskIdx]['percent']))) + result.append(diagret+','+reason+','+suggest) + continue + else: + statusReportDicts['iolatency']['lastIOburstT'] = lastIOburstT + + suggest = 'solution=\"Please ask the OS kernel expert\"' + maxDelayLog = 'avg/max lat:%s/%s ms, lat percent:%s' %( + str(delays[0]['avg']/1000.000), + str(delays[0]['max']/1000.000), + str(delays[0]['percent'])) + if maxDelayComp == 'disk': + reason = ( + 'reason=\"Disk delay(processing IO slowly, %s)\"' %(maxDelayLog)) + suggest = 'solution=\"Please confirm whether the disk is normal\"' + elif maxDelayComp == 'os(block)': + if delays[1]['component'] == 'disk' and \ + float(delays[1]['percent'].strip('%')) > 20: + with open(argvs[0]+'/resultCons.log') as logF: + data = filter(lambda l : 'F' in l, logF.readlines()) + flushIO = False + if len(data) > 0: + for d in data: + if 'F' in d.split()[-6]: + flushIO = True + break + if flushIO: + suggest = ( + 'Disable flush IO dispatch(echo \"write through\" > '\ + '/sys/class/block/%s/queue/write_cache;'\ + 'echo 0 > /sys/class/block/%s/queue/fua)}' % ( + str(ds['diskname']), str(ds['diskname']))) + suggest += '; Notes: Flush IO is a special instruction to '\ + 'ensure that data is stored persistently on the disk '\ + 'in time, and not saved in the internal cache of the disk.'\ + ' Before disabling, please confirm with the disk FAE '\ + '\"Whether it is necessary to rely on the software to issue'\ + ' flush instructions to ensure data persistent storage\",'\ + ' And avoid data loss due to crash or disk power down' + suggest = 'solution=\"'+suggest+'\"' + else: + suggest = 'solution=\"Please confirm whether the disk is normal\"' + reason = ( + 'reason=\"Disk delay(processing %s slowly, avg/max lat:'\ + '%s/%s ms, lat percent:%s)\"' %( + 'Flush IO' if flushIO else 'IO', + str(delays[1]['avg']/1000.000), + str(delays[1]['max']/1000.000), + str(delays[1]['percent']))) + result.append(diagret+','+reason+','+suggest) + continue + reason = ( + 'reason=\"OS delay(Issuing IO slowly at os(block), %s)\"' %( + maxDelayLog)) + else: + reason = ( + 'reason=\"OS delay(processing IO slowly at %s, %s)\"' %( + str(maxDelayComp), maxDelayLog)) + result.append(diagret+','+reason+','+suggest) + + for e, p in zip(result, nfPrefix): + # print(e+'\n') + #nf.put(nfPutPrefix, p+' '+e) + nf.puts(nfPutPrefix+p+' '+e) + statusReportDicts['iolatency']['valid'] = True + + +def iohangResultReport(*argvs): + abnormalDicts={} + firstioDicts={} + result=[] + nf=argvs[1] + nfPutPrefix=str(argvs[2]) + statusReportDicts = argvs[3] + nfPrefix=[] + + os.system('ls -rtd '+argvs[0]+'/../* | head -n -5 |'\ + ' xargs --no-run-if-empty rm {} -rf') + if os.path.exists(argvs[0]+'/result.log'): + with open(argvs[0]+'/result.log') as logF: + data=logF.readline() + else: + return + try: + stat=json.loads(data, object_pairs_hook = OrderedDict) + except Exception: + return + + for ds in stat['summary']: + maxDelay = 0 + hungIO = None + if ds['diskname'] not in abnormalDicts.keys(): + abnormalDicts.setdefault(ds['diskname'], {}) + firstioDicts.setdefault( + ds['diskname'], + {'time':0, 'iotype':0, 'sector':0}) + for hi in ds['hung ios']: + key=hi['abnormal'].split('hang')[0] + delay = float(hi['abnormal'].split('hang')[1].split()[0]) + if delay > maxDelay: + maxDelay = delay + hungIO = hi + if key not in abnormalDicts[ds['diskname']].keys(): + abnormalDicts[ds['diskname']].setdefault(key, 0) + abnormalDicts[ds['diskname']][key] += 1 + t = hungIO['time'].split('.')[0] + tStamp = float(time.mktime(time.strptime(t,'%Y-%m-%d %H:%M:%S'))) + tStamp -= maxDelay + firstioDicts[ds['diskname']]['time'] = \ + time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(tStamp+8*3600)) + firstioDicts[ds['diskname']]['iotype'] = hungIO['iotype'] + firstioDicts[ds['diskname']]['sector'] = hungIO['sector'] + for diskname, val in abnormalDicts.items(): + abnormalDicts[diskname] = OrderedDict( + sorted(val.items(), key=lambda e: e[1], reverse=True)) + + with open(argvs[0]+'/result.log.stat') as logF: + data = logF.readline() + try: + stat = json.loads(data, object_pairs_hook=OrderedDict) + except Exception: + return + + for ds in stat['summary']: + hungIOS = sorted(ds['hung ios'], + key = lambda e: (float(e['percent'].strip('%'))), + reverse = True) + maxDelayComp=hungIOS[0]['component'] + maxDelayPercent=float(hungIOS[0]['percent'].strip('%')) + maxDelay=format(hungIOS[0]['max']/1000.0, '.3f') + diagret='diagret=\"IO hang %sms detected in %s' % ( + maxDelay, ds['diskname'])+'\"' + nfPrefix.append(',diag_type=IO-Hang,devname='+str(ds['diskname'])) + for key in abnormalDicts[ds['diskname']].keys(): + if maxDelayComp in key: + detail = str( + ''.join(re.findall(re.compile(r'[(](.*?)[)]', re.S), key))) + break + reason = ('reason=\"%s hang(%s, avg/max delay:%s/%s ms), first hang['\ + 'time:%s, iotype:%s, sector:%d]\"' %( + maxDelayComp, detail, + str(hungIOS[0]['avg']/1000.000), + str(hungIOS[0]['max']/1000.000), + firstioDicts[ds['diskname']]['time'], + firstioDicts[ds['diskname']]['iotype'], + firstioDicts[ds['diskname']]['sector'])) + if maxDelayComp == 'Disk' or maxDelayComp == 'OS': + suggest = 'solution=\"Please confirm whether the disk is normal\"' + if maxDelayComp == 'OS': + suggest = 'solution=\"Please ask the OS kernel expert\"' + result.append(diagret+','+reason+','+suggest) + + for e, p in zip(result, nfPrefix): + nf.puts(nfPutPrefix+p+' '+e) + #nf.put(nfPutPrefix, p+' '+e) + statusReportDicts['iohang']['valid'] = True + + +def ioutilDataParse(data, resultInfo): + tUnit = None + totalBw = totalIops = 0 + for ds in data['mstats']: + iops = ds['iops_rd'] + ds['iops_wr'] + bps = bwToValue(ds['bps_wr']) + bwToValue(ds['bps_rd']) + totalBw += bps + totalIops += iops + key = ds['comm']+':'+ds['pid']+':'+ds['cid'][0:20]+':'+ds['device'] + if not tUnit: + if ds['bps_wr'] != '0': + tUnit = ds['bps_wr'].split('/')[1] + else: + tUnit = ds['bps_rd'].split('/')[1] + if key not in resultInfo.keys(): + resultInfo.setdefault(key, + {'disk':ds['device'], 'maxIops':0, 'maxBps':0, 'file':ds['file']}) + resultInfo[key]['maxBps'] = max(bps, resultInfo[key]['maxBps']) + resultInfo[key]['maxIops'] = max(iops, resultInfo[key]['maxIops']) + if resultInfo[key]['maxBps'] != bps or resultInfo[key]['maxIops'] != iops: + resultInfo[key]['file'] = ds['file'] + if 'bufferio' in resultInfo.keys(): + del resultInfo[key]['bufferio'] + if 'bufferio' in ds.keys() and 'bufferio' not in resultInfo[key].keys(): + resultInfo[key].setdefault('bufferio', ds['bufferio']) + return totalIops,totalBw,tUnit + + +def ioutilReport(nf, nfPutPrefix, resultInfo, tUnit, diagret): + top = 1 + suggestPS = reason = '' + resultInfo = \ + sorted(resultInfo.items(), key=lambda e: e[1]['maxBps'], reverse=True) + for key, val in resultInfo: + if val['maxIops'] < 50 or val['maxBps'] < 1024 * 1024 * 5: + continue + file = ', target file:'+str(val['file']) if val['file'] != '-' else '' + if 'kworker' in str(key): + kTasklist = [] + if 'bufferio' in val.keys(): + for i in val["bufferio"]: + if 'KB' in i["Wrbw"]: + continue + kTasklist.append(i['task']) + file += ('%s Wrbw %s disk %s file %s;' % + (i['task'], i["Wrbw"], i["device"], i["file"])) + if len(kTasklist): + file = '(Write bio from: '+file+')' + if top == 1: + suggestPS = '(Found \'kworker\' flush dirty pages, Try to reduce'\ + ' the buffer-IO write?%s or check the config /proc/sys/vm/'\ + '{dirty_ratio,dirty_background_ratio} too small?)' %( + '('+';'.join(kTasklist)+')' if len(kTasklist) else '') + maxBps = humConvert(val['maxBps']).replace('s', tUnit) + reason += ('%d. task[%s], access disk %s with iops:%s, bps:%s%s; ' %( + top, str(key.rsplit(':',1)[0]), str(val['disk']), + str(val['maxIops']), maxBps, file)) + if top == 1 and suggestPS == '': + suggestPS = '(Found task \'%s\')' %(str(key.rsplit(':',1)[0])) + top += 1 + suggest = \ + 'Optimize the tasks that contributes the most IO flow%s' % suggestPS + putIdx = ',diag_type=IO-Burst ' + putField = 'diagret=\"%s\",reason=\"%s\",solution=\"%s\"' %( + diagret, reason, suggest) + #nf.put(nfPutPrefix, + if reason != '': + nf.puts(nfPutPrefix+putIdx+putField) + # print(prefix+reason+suggest+'\n') + + +def ioutilResultReport(*argvs): + resultInfo= {} + nf= argvs[1] + nfPutPrefix= str(argvs[2]) + statusReportDicts = argvs[3] + totalBw = 0 + maxIops = maxBw = 0 + minIops = minBw = sys.maxsize + tUnit = None + + os.system('ls -rtd '+os.path.dirname(argvs[0])+'/../* | head -n -5 |'\ + ' xargs --no-run-if-empty rm {} -rf') + if os.path.exists(argvs[0]): + with open(argvs[0]) as logF: + dataList = logF.readlines() + else: + return + for data in dataList: + try: + stat = json.loads(data, object_pairs_hook =OrderedDict) + except Exception: + return + iops,bw,tUnit = ioutilDataParse(stat, resultInfo) + maxIops = max(maxIops, iops) + minIops = min(minIops, iops) + maxBw = max(maxBw, bw) + minBw = min(minBw, bw) + totalBw += bw + if totalBw < 1024 * 1024 * 10: + return + + if resultInfo: + content = 'Iops:'+str(minIops)+'~'+str(maxIops)+\ + ', Bps:'+humConvert(minBw).replace('s', tUnit)+\ + '~'+humConvert(maxBw).replace('s', tUnit) + diagret = 'IO-Burst('+content+') detected' + ioutilReport(nf, nfPutPrefix, resultInfo, tUnit, diagret) + statusReportDicts['ioutil']['valid'] = True + + +def iowaitDataParse(data, resultInfo): + unkownDisable = False + for io in data['iowait']: + if 'many dirty' in io['reason'] or 'queue full' in io['reason']: + unkownDisable = True + if 'Unkown' in io['reason'] and unkownDisable == True: + continue + key = io['comm']+':'+io['tgid']+':'+io['pid'] + if key not in resultInfo.keys(): + resultInfo.setdefault( + key, {'timeout': 0, 'maxIowait': 0, 'reason': ''}) + if float(io['iowait']) > float(resultInfo[key]['maxIowait']): + resultInfo[key]['maxIowait'] = io['iowait'] + resultInfo[key]['timeout'] = io['timeout'] + resultInfo[key]['reason'] = io['reason'] + return data['global iowait'],unkownDisable + + +def iowaitReport(nf, nfPutPrefix, unkownDisable, resultInfo, diagret): + top = 0 + reason = '' + resDicts = { + 'Too many dirty pages':False, + 'Device queue full':False, + 'Ioscheduler queue full':False} + + for key, val in resultInfo.items(): + if unkownDisable == True and 'Unkown' in val['reason']: + del resultInfo[key] + + resultInfo = OrderedDict( + sorted(resultInfo.items(), key=lambda e: float(e[1]['maxIowait']), + reverse=True)[:3]) + for key, val in resultInfo.items(): + if unkownDisable == True: + resDicts[val['reason']] = True + top += 1 + reason += ( + '%d. task[%s], wait %sms, contribute iowait %s due to \'%s\'; ' %( + top, str(key), str(val['timeout']), str(val['maxIowait'])+'%', + str(val['reason']))) + + if unkownDisable == True: + if resDicts['Too many dirty pages'] == True: + suggest = 'Reduce io-write pressure or Adjust /proc/sys/vm/'\ + '{dirty_ratio,dirty_bytes} larger carefully' + else: + if resDicts['Device queue full'] and resDicts['Ioscheduler queue full']: + suggest = \ + 'Device queue full -> Disk busy due to disk queue full, '\ + 'Please reduce io pressure;'\ + 'Ioscheduler queue full -> Io scheduler busy due to '\ + 'scheduler queue full, '\ + 'Please reduce io pressure or Adjust '\ + '/sys/block//queue/nr_requests larger carefully' + elif resDicts['Device queue full']: + suggest = 'Disk busy due to disk queue full, '\ + 'Please reduce io pressure' + elif resDicts['Ioscheduler queue full']: + suggest = 'Io scheduler busy due to scheduler queue full, '\ + 'Please reduce io pressure or Adjust '\ + '/sys/block//queue/nr_requests larger carefully' + else: + suggest = 'Report stacktrace to OS kernel specialist' + + putIdx = ',diag_type=IOwait-high ' + putField = 'diagret=\"%s\",reason=\"%s\",solution=\"%s\"' %( + diagret, reason, suggest) + #nf.put(nfPutPrefix, + nf.puts(nfPutPrefix+putIdx+putField) + + +def iowaitResultReport(*argvs): + resultInfo = {} + nf = argvs[1] + nfPutPrefix = str(argvs[2]) + statusReportDicts = argvs[3] + maxGiowait = 0 + minGiowait = sys.maxsize + unkownDisable = None + + os.system('ls -rtd '+os.path.dirname(argvs[0])+'/../* | head -n -5 |'\ + ' xargs --no-run-if-empty rm {} -rf') + if os.path.exists(argvs[0]): + with open(argvs[0]) as logF: + dataList = logF.readlines() + else: + return + + for data in dataList: + try: + stat = json.loads(data, object_pairs_hook=OrderedDict) + except Exception: + return + gIowait,disable = iowaitDataParse(stat, resultInfo) + if not unkownDisable: + unkownDisable = disable + maxGiowait = max(maxGiowait, gIowait) + minGiowait = min(minGiowait, gIowait) + + if resultInfo: + content = str(minGiowait)+'%~'+str(maxGiowait)+'%' + diagret = 'IOwait high('+content+') detected' + iowaitReport(nf, nfPutPrefix, unkownDisable, resultInfo, diagret) + statusReportDicts['iowait']['valid'] = True + # print(diagret+reason+solution+'\n') + + +class displayClass(object): + def __init__(self, sender): + self.funcResultReportDicts = { + 'iohang': iohangResultReport, + 'ioutil': ioutilResultReport, + 'iolatency': iolatencyResultReport, + 'iowait': iowaitResultReport} + self.statusReportDicts = { + 'iohang': {'startT': 0, 'endT': 0, 'valid': False}, + 'ioutil': {'startT': 0, 'endT': 0, 'valid': False, + 'iopsThresh': 0, 'bpsThresh': 0}, + 'iolatency': {'startT': 0, 'endT': 0, 'valid': False, + 'lastIOburstT': 0}, + 'iowait': {'startT': 0, 'endT': 0, 'valid': False}, + } + self._sender = sender + self._nfPutPrefix = 'IOMonDiagLog' + + def markIoburst(self, now): + self.statusReportDicts['iolatency']['lastIOburstT'] = now + + def setIoburstThresh(self, iopsThresh, bpsThresh): + self.statusReportDicts['ioutil']['iopsThresh'] = iopsThresh + self.statusReportDicts['ioutil']['bpsThresh'] = bpsThresh + + def diagnoseValid(self, diagType): + return self.statusReportDicts[diagType]['valid'] + + def start(self, timeout, diagType, filepath, startTime, endTime): + self.statusReportDicts[diagType]['startT'] = startTime + self.statusReportDicts[diagType]['endT'] = endTime + self.statusReportDicts[diagType]['valid'] = False + argvs = [ + filepath, self._sender, self._nfPutPrefix, self.statusReportDicts] + timer = threading.Timer(timeout, + self.funcResultReportDicts[diagType], + argvs) + timer.start() diff --git a/source/tools/monitor/ioMonitor/ioMon/exceptCheckClass.py b/source/tools/monitor/ioMonitor/ioMon/exceptCheckClass.py new file mode 100755 index 0000000000000000000000000000000000000000..23b2a9ec13fd4b3872677b05098132d376520f2b --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/exceptCheckClass.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- + +import sys +import string + + +class exceptCheckClass(): + def __init__(self, window): + self.window = int(window) if window is not None else 100 + self._exceptChkDicts = {} + + def addItem(self, key): + exceptChkItem = { + 'baseThresh': { + 'nrSample': 0, + 'moveWinData': [], + 'curWinMinVal': sys.maxsize, + 'curWinMaxVal': 0, + 'moveAvg': 0, + 'thresh': 0}, + 'compensation': { + 'thresh': 0, + 'shouldUpdThreshComp': True, + 'decRangeThreshAvg': 0, + 'decRangeCnt': 0, + 'minStableThresh': sys.maxsize, + 'maxStableThresh': 0, + 'stableThreshAvg': 0, + 'nrStableThreshSample': 0}, + 'dynTresh': sys.maxsize, + 'usedWin': 0} + self._exceptChkDicts.setdefault(key, exceptChkItem) + + # The sliding window calculates the basic threshold, through which the spikes + # and burrs in the IO indicators can be screened. The calculation idea is as + # follows: + # 1. take 100 data as a group for calculation (calculate 1 ~ 100 data for the + # first time, 2 ~ 101 for the second time, 3 ~ 102 for the third time, and + # so on), and calculate the average value mavg of 100 data in the current + # window + # 2. obtain the maximum value Max and minimum value min of 100 data, then record + # the thresh (MAX((max-mavg),(mavg-min))) each time, and calculate the average + # value(threshavg) of all thresh at this time each time, taking threshavg as + # the basic threshold for this time + # 3. The next basic threshold follows steps 1, 2, and so on + def _calcBaseThresh(self, key, e): + exceptChkDict = self._exceptChkDicts[key] + bt = exceptChkDict['baseThresh'] + thresh = None + + bt['nrSample'] += 1 + if bt['nrSample'] >= self.window: + if len(bt['moveWinData']) < self.window: + bt['moveWinData'].append(e) + else: + bt['moveWinData'][exceptChkDict['usedWin'] % self.window] = e + moveAvg = float( + format(sum(bt['moveWinData']) / float(self.window), '.1f')) + + # Find the min and max values of this window so far + maxVal = max(bt['curWinMaxVal'], e) + minVal = min(bt['curWinMinVal'], e) + nrThreshSample = bt['nrSample'] + 1 - self.window + thresh = float( + format(max(maxVal - moveAvg, moveAvg - minVal), '.1f')) + # Calculate base threshold + threshAvg = float(format( + (bt['thresh'] * (nrThreshSample - 1) + thresh) / nrThreshSample, + '.3f')) + bt['thresh'] = threshAvg + bt['moveAvg'] = moveAvg + bt['curWinMaxVal'] = maxVal + bt['curWinMinVal'] = minVal + + exceptChkDict['usedWin'] += 1 + if exceptChkDict['usedWin'] >= self.window: + # the next window, set min and Max to 0 + bt['curWinMaxVal'] = 0 + bt['curWinMinVal'] = sys.maxsize + exceptChkDict['usedWin'] = 0 + else: + # Here, only the first window will enter to ensure that + # the data in one window is accumulated + bt['moveWinData'].append(e) + bt['curWinMaxVal'] = max(bt['curWinMaxVal'], e) + bt['curWinMinVal'] = min(bt['curWinMinVal'], e) + exceptChkDict['usedWin'] += 1 + return thresh + + # Called by _calcCompThresh to calculate the compensation value + # under normal steady state + def _calcStableThresh(self, ct, curBaseThresh, curThresh): + # Discard points exceeding (base-threshold / 10) + avg = ct['decRangeThreshAvg'] + if (curThresh - avg) < ((curBaseThresh - avg) / 10.0): + tSum = ct['stableThreshAvg'] * \ + ct['nrStableThreshSample'] + curThresh + ct['nrStableThreshSample'] += 1 + ct['stableThreshAvg'] = tSum / ct['nrStableThreshSample'] + ct['minStableThresh'] = min(ct['minStableThresh'], curThresh) + ct['maxStableThresh'] = max(ct['maxStableThresh'], curThresh) + # 1.5 windows of stable data have been counted, + # which can be used as normal threshold compensation value + if ct['nrStableThreshSample'] >= (self.window * 1.5): + ct['thresh'] = \ + max(ct['stableThreshAvg'] - ct['minStableThresh'], + ct['maxStableThresh'] - ct['stableThreshAvg']) + ct['shouldUpdThreshComp'] = False + ct['minStableThresh'] = sys.maxsize + ct['maxStableThresh'] = 0 + ct['stableThreshAvg'] = ct['decRangeThreshAvg'] = 0 + ct['nrStableThreshSample'] = ct['decRangeCnt'] = 0 + + # Calculate the threshold compensation value and superimpose this value + # on the basic threshold to eliminate false alarms + def _calcCompThresh(self, key, lastBaseThresh, curThresh): + exceptChkDict = self._exceptChkDicts[key] + curBaseThresh = exceptChkDict['baseThresh']['thresh'] + ct = exceptChkDict['compensation'] + + # It is not confirmed whether the current state is constant + # (constant state is defined as IO index fluctuation, which is stable) + # 1. the max basic threshold of this window is the compensation value + # 2. enter a new window to reset to the current basic threshold + if ct['shouldUpdThreshComp'] == True and \ + (ct['thresh'] < curBaseThresh or exceptChkDict['usedWin'] == 0): + ct['thresh'] = curBaseThresh + + # Continuous monotonic decreasing, constant steady state, + # constant compensation threshold inferred + if curBaseThresh < lastBaseThresh: + tSum = ct['decRangeThreshAvg'] * ct['decRangeCnt'] + curThresh + ct['decRangeCnt'] += 1 + ct['decRangeThreshAvg'] = tSum / ct['decRangeCnt'] + # The monotonic decline has continued for 1.5 windows, + # indicating that IO pressure may return to normality + if ct['decRangeCnt'] >= (self.window * 1.5): + self._calcStableThresh(ct, curBaseThresh, curThresh) + else: + # As long as the basic threshold curve is not + # continuously monotonically decreasing, + # reset to 0 and make statistics again + ct['minStableThresh'] = sys.maxsize + ct['maxStableThresh'] = 0 + ct['stableThreshAvg'] = ct['decRangeThreshAvg'] = 0 + ct['nrStableThreshSample'] = ct['decRangeCnt'] = 0 + + # Update the dynamic threshold of the corresponding indicator type + # and call it after collecting the IO indicators. The key is await, + # util, IOPs, BPS, etc + def updateDynThresh(self, key, e): + exceptChkDict = self._exceptChkDicts[key] + bt = exceptChkDict['baseThresh'] + ct = exceptChkDict['compensation'] + lastBaseThresh = bt['thresh'] + + curThresh = self._calcBaseThresh(key, e) + if curThresh is not None: + self._calcCompThresh(key, lastBaseThresh, curThresh) + exceptChkDict['dynTresh'] = \ + bt['thresh'] + bt['moveAvg'] + ct['thresh'] + + # Turn off the threshold compensation of the corresponding indicators. + # Generally, when it is detected that the IO util exceeds 20%, + # it will be disabled according to the situation of each indicator + def disableThreshComp(self, key): + exceptChkDict = self._exceptChkDicts[key] + ct = exceptChkDict['compensation'] + bt = exceptChkDict['baseThresh'] + + #if exceptChkDict['dynTresh'] == sys.maxsize: + # return + + if ct['shouldUpdThreshComp'] == True: + ct['shouldUpdThreshComp'] = False + exceptChkDict['dynTresh'] = bt['thresh'] + bt['moveAvg'] + ct['thresh'] = 0.000001 + + + def getNrDataSample(self, key): + return self._exceptChkDicts[key]['baseThresh']['nrSample'] + + # Get the dynamic threshold of the corresponding indicator type, + # call it after collecting the IO indicators, and judge whether + # the indicators are abnormal. The key is await, util, IOPs, BPS, etc + def getDynThresh(self, key): + return self._exceptChkDicts[key]['dynTresh'] diff --git a/source/tools/monitor/ioMonitor/ioMon/exceptDiagnoseClass.py b/source/tools/monitor/ioMonitor/ioMon/exceptDiagnoseClass.py new file mode 100755 index 0000000000000000000000000000000000000000..7a0d5f74c6873a18c2b489c6086ed3c4b76a1bc8 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/exceptDiagnoseClass.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- + +import os +import string +import time +from collections import OrderedDict +import threading +from tools.iofstool.iofsstat import iofsstatStart +from tools.iowaitstat.iowaitstat import iowaitstatStart +from displayClass import displayClass + + +class runDiag(object): + def __init__(self, logRootPath, sender): + self.funcDicts = { + 'iohang': self.startIohangDiagnose, + 'ioutil': self.startIoutilDiagnose, + 'iolatency': self.startIolatencyDiagnose, + 'iowait': self.startIowaitDiagnose} + self.lastDiagTimeDicts = \ + {'iohang': 0, 'ioutil': 0, 'iolatency': 0, 'iowait': 0} + self.display = displayClass(sender) + self.sysakPath = 'sysak' + self.logRootPath = logRootPath + + + def _recentDiagnoseValid(self, diagType): + return self.display.diagnoseValid(diagType) + + + def startIohangDiagnose(self, *argv): + devname = argv[0] + now = time.time() + if now - self.lastDiagTimeDicts['iohang'] <= 60: + return + startTime = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(now)) + logdir = self.logRootPath+'/iosdiag/hangdetect/'+startTime + outlog = logdir+'/resultCons.log' + if not os.path.exists(logdir): + try: + os.makedirs(logdir) + except Exception: + return + self.lastDiagTimeDicts['iohang'] = now + if devname is not None: + os.system(self.sysakPath+' -g iosdiag hangdetect -o -t 3000 -T 10 -f '+ + logdir+' '+devname+' > '+outlog+' &') + else: + os.system(self.sysakPath+' -g iosdiag hangdetect -o -t 3000 -T 10 -f '+ + logdir+' > '+outlog+' &') + self.display.start(20, 'iohang', logdir, now, now+60) + + + def startIolatencyDiagnose(self, *argv): + devname = argv[0] + thresh = argv[1] + ioburst = argv[2] + now = time.time() + if now - self.lastDiagTimeDicts['iolatency'] <= 60: + return + startTime = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(now)) + logdir = self.logRootPath+'/iosdiag/latency/'+startTime + outlog = logdir+'/resultCons.log' + if not os.path.exists(logdir): + try: + os.makedirs(logdir) + except Exception: + return + self.lastDiagTimeDicts['iolatency'] = now + if devname is not None: + os.system(self.sysakPath+' -g iosdiag latency -t '+str(thresh) + + ' -T 45 -f '+logdir+' '+devname+' > '+outlog+' &') + else: + os.system(self.sysakPath+' -g iosdiag latency -t '+str(thresh) + + ' -T 45 -f '+logdir+' > '+outlog+' &') + if ioburst: + self.display.markIoburst(now) + self.display.start(60, 'iolatency', logdir, now, now+60) + + + def startIoutilDiagnose(self, *argv): + devname = argv[0] + bwThresh = argv[1] + iopsThresh = argv[2] + now = time.time() + if now - self.lastDiagTimeDicts['ioutil'] <= 60: + return + startTime = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(now)) + logdir = self.logRootPath+'/iosdiag/iofsstat/'+startTime + outlog = logdir+'/resultCons.log' + if not os.path.exists(logdir): + try: + os.makedirs(logdir) + except Exception: + return + self.lastDiagTimeDicts['ioutil'] = now + #self.display.setIoburstThresh(iopsThresh, bwThresh) + argvs = ['-j',outlog,'-n','-m','-c','1','-t','5','-T','40', + '-i',str(iopsThresh),'-b',str(bwThresh)] + threading.Thread(target=iofsstatStart, args=(argvs,)).start() + self.display.start(55, 'ioutil', outlog, now, now+60) + + + def startIowaitDiagnose(self, *argv): + iowaitThresh = argv[0] + now = time.time() + if now - self.lastDiagTimeDicts['iowait'] <= 60: + return + startTime = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(now)) + logdir = self.logRootPath+'/iosdiag/iowaitstat/'+startTime + outlog = logdir+'/resultCons.log' + if not os.path.exists(logdir): + try: + os.makedirs(logdir) + except Exception: + return + self.lastDiagTimeDicts['iowait'] = now + argvs = ['-j', outlog, '-t', '5', '-w', str(iowaitThresh), '-T', '45'] + threading.Thread(target=iowaitstatStart, args=(argvs,)).start() + self.display.start(55, 'iowait', outlog, now, now+60) + + + def runDiagnose(self, diagType, argv): + self.funcDicts[diagType](*list(argv)) + + +class diagnoseClass(runDiag): + def __init__(self, window, logRootPath, sender): + super(diagnoseClass, self).__init__(logRootPath, sender) + self.window = window + self.diagnoseDicts = OrderedDict() + self._diagStat = OrderedDict( + {'iohang': {'run': False, 'argv': [0, 0, 0, 0, 0, 0, 0, 0]}, + 'ioutil': {'run': False, 'argv': [0, 0, 0, 0, 0, 0, 0, 0]}, + 'iowait': {'run': False, 'argv': [0, 0, 0, 0, 0, 0, 0, 0]}, + 'iolatency': {'run': False, 'argv': [0, 0, 0, 0, 0, 0, 0, 0]}}) + + + def addItem(self, devname, key, reportInterval, triggerInterval): + diagRecord = { + 'statWindow': self.window, + 'trigger': False, + 'lastReport': 0, + 'reportInterval': reportInterval, + 'reportCnt': 0, + 'lastDiag': 0, + 'triggerInterval': triggerInterval, + 'diagArgs': [0, 0, 0, 0, 0, 0, 0, 0]} + if devname not in self.diagnoseDicts.keys(): + self.diagnoseDicts.setdefault(devname, {key: diagRecord}) + else: + self.diagnoseDicts[devname].setdefault(key, diagRecord) + + + def setUpDiagnose(self, devname, key, nrSample, *argv): + diagnoseDicts = self.diagnoseDicts[devname][key] + lastDiag = diagnoseDicts['lastDiag'] + lastReport = diagnoseDicts['lastReport'] + statWindow = diagnoseDicts['statWindow'] + reportInterval = diagnoseDicts['reportInterval'] + triggerInterval = diagnoseDicts['triggerInterval'] + + if reportInterval != 0: + if lastReport == 0 or (nrSample-lastReport) > statWindow: + diagnoseDicts['lastReport'] = nrSample + diagnoseDicts['reportCnt'] = 1 + else: + diagnoseDicts['reportCnt'] += 1 + if diagnoseDicts['reportCnt'] > reportInterval: + if lastDiag == 0 or (nrSample-lastDiag) > triggerInterval: + diagnoseDicts['trigger'] = True + diagnoseDicts['reportCnt'] = 0 + diagnoseDicts['lastDiag'] = nrSample + else: + diagnoseDicts['lastReport'] = nrSample + diagnoseDicts['reportCnt'] = 0 + elif triggerInterval != 0: + if lastDiag == 0 or (nrSample-lastDiag) >= triggerInterval: + diagnoseDicts['lastDiag'] = nrSample + diagnoseDicts['trigger'] = True + else: + diagnoseDicts['trigger'] = True + + for idx, val in enumerate(argv): + diagnoseDicts['diagArgs'][idx] = val + + + def isException(self, devname, key): + diagnoseDicts = self.diagnoseDicts[devname][key] + reportInterval = diagnoseDicts['reportInterval'] + triggerInterval = diagnoseDicts['triggerInterval'] + + if reportInterval != 0: + if (diagnoseDicts['reportCnt'] + 1) >= reportInterval: + return True + elif triggerInterval != 0: + return True + else: + return True + return False + + + def clearDiagStat(self): + for diagType, stat in self._diagStat.items(): + stat['run'] = False + stat['argv'][0:] = [0, 0, 0, 0, 0, 0, 0, 0] + + + def checkDiagnose(self): + diagnoseDicts = self.diagnoseDicts + diagInfo = {'iohang': [], 'iolatency': [], 'ioutil': []} + diagStat = self._diagStat + ioburst = False + + for devname, diagDict in diagnoseDicts.items(): + if devname == 'system': + if diagDict['iowait']['trigger'] == True: + diagStat['iowait']['run'] = True + diagStat['iowait']['argv'][0] = \ + diagDict['iowait']['diagArgs'][0] + diagDict['iowait']['trigger'] = False + continue + + for diagType in ['iohang', 'iolatency', 'ioutil']: + if diagDict[diagType]['trigger'] == True: + if diagType == 'iolatency': + ioburst = diagDict['iolatency']['diagArgs'][1] + diagInfo[diagType].append(devname) + diagDict[diagType]['trigger'] = False + + for diagType, value in diagInfo.items(): + diagStat[diagType]['run'] = True + if len(value) > 1: + diagStat[diagType]['argv'][0] = None + elif len(value) == 1: + diagStat[diagType]['argv'][0] = value[0] + else: + diagStat[diagType]['run'] = False + + if diagStat['ioutil']['run'] == True: + for idx in [1,2]: + val = sorted( + [diagnoseDicts[dev]['ioutil']['diagArgs'][idx-1] + for dev in diagInfo['ioutil']], + reverse=True) + diagStat['ioutil']['argv'][idx] = val[-1] + + if diagStat['iolatency']['run'] == True: + diagStat['iolatency']['argv'][1] = sorted( + [diagnoseDicts[dev]['iolatency']['diagArgs'][0] + for dev in diagInfo['iolatency']], + reverse=True)[-1] + diagStat['iolatency']['argv'][2] = ioburst + + for diagType, stat in diagStat.items(): + if stat['run'] == True: + self.runDiagnose(diagType, stat['argv']) + stat['run'] = False + + + # In displayClass, after the diagnostic log is reported to the remote end, + # it will be marked as a valid diagnosis, and In exceptDiagnoseClass, + # clear the valid mark before each diagnosis + def recentDiagnoseValid(self, diagType): + return self._recentDiagnoseValid(diagType) diff --git a/source/tools/monitor/ioMonitor/ioMon/ioMonCfgClass.py b/source/tools/monitor/ioMonitor/ioMon/ioMonCfgClass.py new file mode 100755 index 0000000000000000000000000000000000000000..ffc13f293073751d9e0f185c42cd01939275a995 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/ioMonCfgClass.py @@ -0,0 +1,137 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import time +import json +from collections import OrderedDict + +globalCfgDicts = {} +globalCfgPath = '' +def loadCfg(cfgPath): + with open(cfgPath) as f: + data = f.read() + return json.loads(data) + + +def loadCfgHander(signum, frame): + global globalCfgDicts + globalCfgDicts = loadCfg(globalCfgPath) + + +class ioMonCfgClass(object): + def __init__(self, cfgArg, resetCfg, logRootPath): + global globalCfgPath + self.cfgPath = logRootPath+'/ioMon/ioMonCfg.json' + globalCfgPath = self.cfgPath + cfg = self._paserCfg(cfgArg) + hasArgs = any(list(cfg.values())) + if not os.path.exists(self.cfgPath) or resetCfg: + cfg['iowait'] = int(cfg['iowait']) if cfg['iowait'] else 5 + cfg['await'] = int(cfg['await']) if cfg['await'] else 10 + cfg['util'] = int(cfg['util']) if cfg['util'] else 20 + cfg['iops'] = int(cfg['iops']) if cfg['iops'] else 150 + cfg['bps'] = int(cfg['bps']) if cfg['bps'] else 31457280 + cfg['cycle'] = int(cfg['cycle']) if cfg['cycle'] else 1000 + cfg['diagIowait'] = cfg['diagIowait'] if cfg['diagIowait'] else 'on' + cfg['diagIoburst'] = cfg['diagIoburst'] if cfg['diagIoburst'] else 'on' + cfg['diagIolat'] = cfg['diagIolat'] if cfg['diagIolat'] else 'on' + cfg['diagIohang'] = cfg['diagIohang'] if cfg['diagIohang'] else 'off' + self._updateCfg(cfg) + return + else: + self._loadCfg() + if hasArgs: + self._updateCfg(cfg) + + + def _paserCfg(self, cfgArg): + cfgDicts = { + 'iowait':None, 'await':None, 'util':None, 'iops':None, 'bps':None, + 'cycle':None, 'diagIowait':None, 'diagIoburst':None, + 'diagIolat':None, 'diagIohang':None} + try: + cfgList = \ + cfgArg.split(',') if cfgArg is not None and len(cfgArg) > 0 else [] + for cfg in cfgList: + errstr = None + c = cfg.split('=') + if c[0] not in cfgDicts.keys() or len(c[1]) == 0: + errstr = "bad cfg item: %s, must be in %s" %( + cfg, str(cfgDicts.keys())) + elif 'diag' not in c[0] and not c[1].isdigit(): + errstr = "monitor cfg argv must be digit: %s." %cfg + elif 'diag' in c[0] and c[1] not in ['on', 'off']: + errstr = \ + "diagnose cfg argv must be [\'on\', \'off\']: %s." %cfg + if errstr: + print(errstr) + sys.exit(0) + cfgDicts[c[0]] = c[1] + except Exception: + print "bad cfg: %s." %cfg + sys.exit(0) + return cfgDicts + + + def _setGlobalCfgDicts(self, CfgDicts): + global globalCfgDicts + globalCfgDicts = CfgDicts + + + def _getGlobalCfgDicts(self): + global globalCfgDicts + return globalCfgDicts + + + def _updateCfg(self, cfgDicts): + oldCfg = {} + if not os.path.exists(self.cfgPath): + if not os.path.exists(os.path.dirname(self.cfgPath)): + os.mkdir(os.path.dirname(self.cfgPath)) + else: + oldCfg = loadCfg(self.cfgPath) + f = open(self.cfgPath, 'w+') + newCfg = json.loads(json.dumps(cfgDicts)) + if oldCfg: + for key,val in newCfg.items(): + if val is not None: + oldCfg[key] = val + newCfg = oldCfg + s = json.dumps(newCfg, indent=4) + f.write(s) + f.close() + self._setGlobalCfgDicts(newCfg) + + + def _loadCfg(self): + self._setGlobalCfgDicts(loadCfg(self.cfgPath)) + + + def createCfgFlagFile(self): + f = open(os.path.dirname(self.cfgPath)+'/.ioMonCfgFlag', 'w+') + f.write(str(os.getpid())) + f.close() + signal.signal(signal.SIGUSR2, loadCfgHander) + + + def notifyIoMon(self): + try: + with open(os.path.dirname(self.cfgPath)+'/.ioMonCfgFlag') as f: + pid = f.read() + with open('/proc/'+str(pid)+'/cmdline') as f: + cmdline = f.read().strip() + except Exception: + sys.exit(0) + if 'ioMonitorMain' in cmdline: + os.system('kill -USR2 '+str(pid)) + + + def getCfgItem(self, key): + val = str(self._getGlobalCfgDicts()[key]) + if val.isdigit(): + val = int(val) + return val diff --git a/source/tools/monitor/ioMonitor/ioMon/ioMonitorClass.py b/source/tools/monitor/ioMonitor/ioMon/ioMonitorClass.py new file mode 100755 index 0000000000000000000000000000000000000000..2a9dec0cd56e16e4bb2243e96f171e17c15b821b --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/ioMonitorClass.py @@ -0,0 +1,372 @@ +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import argparse +import time +from exceptDiagnoseClass import diagnoseClass +from exceptCheckClass import exceptCheckClass +from ioMonCfgClass import ioMonCfgClass +from collections import OrderedDict +from nfPut import CnfPut + +class ioMonitorClass(object): + def __init__(self, logRootPath, cfg, pipeFile): + self.window = 60 + self.cfg = cfg + self.cfg.createCfgFlagFile() + self.diagSwitch = { + 'diagIowait': {'sw': self.cfg.getCfgItem('diagIowait'), + 'esi':'IOwait-High'}, + 'diagIoburst': {'sw': self.cfg.getCfgItem('diagIoburst'), + 'esi':'IO-Delay'}, + 'diagIolat': {'sw': self.cfg.getCfgItem('diagIolat'), + 'esi':'IO-Burst'}, + 'diagIohang': {'sw': self.cfg.getCfgItem('diagIohang'), + 'esi':'IO-Hang'} + } + self._sender = CnfPut(pipeFile) + self._nfPutTlb = 'IOMonIndForDisksIO' + self._nfPutTlb4System = 'IOMonIndForSystemIO' + self.fieldDicts = OrderedDict() + self.exceptChkDicts = {'system': exceptCheckClass(self.window)} + self.exceptChkDicts['system'].addItem('iowait') + self.diagnose = diagnoseClass(self.window, logRootPath, self._sender) + self.diagnose.addItem('system', 'iowait', 0, 60) + self.fDiskStats = open("/proc/diskstats") + self.cpuStatIowait = {'sum': 0, 'iowait': 0} + self.uploadInter = 0 + self.exceptionStat = {'system': {'IOwait-High': {'cur':0,'max':0}}} + self.dataStat = {'system': {'iowait': 0}} + + + def _addMonitorAttrForDisk(self, disk): + dataStat = self.dataStat + exceptChkDicts = self.exceptChkDicts + diagnose = self.diagnose + exceptionStat = self.exceptionStat + + # used for reporting per-index to database + dataStat.setdefault( + disk, {'await': 0, 'util': 0, 'iops': 0, 'bps': 0, 'qusize': 0}) + + # add exception-check attr for per-index on per-disk + exceptChkDicts.setdefault(disk, exceptCheckClass(self.window)) + for key in ['util', 'await', 'iops', 'bps']: + exceptChkDicts[disk].addItem(key) + + # add diagnose attr for per-index on per-disk + diagnoseDict = { + 'iohang': {'triggerInterval': self.window * 5, + 'reportInterval': 10}, + 'ioutil': {'triggerInterval': 60, 'reportInterval': 0}, + 'iolatency': {'triggerInterval': 60, 'reportInterval': 0} + } + for key, item in diagnoseDict.items(): + diagnose.addItem( + disk, key, item['reportInterval'], item['triggerInterval']) + # used for reporting exception to database + exceptionStat.setdefault( + disk, + {'IO-Delay':{'cur':0,'max':0}, 'IO-Burst':{'cur':0,'max':0}, + 'IO-Hang':{'cur':0,'max':0}}) + + + def _removeDiskMonitor(self, disk): + del self.fieldDicts[disk] + del self.dataStat[disk] + del self.exceptChkDicts[disk] + del self.diagnose[disk] + del self.exceptionStat[disk] + + + def _disableThreshComp(self, devname, qusize): + exceptChkDicts = self.exceptChkDicts + exceptChkDicts[devname].disableThreshComp('util') + exceptChkDicts[devname].disableThreshComp('iops') + exceptChkDicts[devname].disableThreshComp('bps') + if qusize > 1: + exceptChkDicts[devname].disableThreshComp('await') + + + def _calcIowait(self): + with open("/proc/stat") as fStat: + statList = map(long, fStat.readline().split()[1:]) + iowait = float(format( + (statList[4] - self.cpuStatIowait['iowait']) * 100.0 / + (sum(statList) - self.cpuStatIowait['sum']), '.2f')) + return iowait + + + def _calcIoIndex(self, devname, field, secs): + ds = self.dataStat[devname] + uploadInter = self.uploadInter + + rws = field['1'][1] + field['5'][1] - field['1'][0] - field['5'][0] + iops = round(rws / secs, 1) + ds['iops'] = (ds['iops'] * (uploadInter - 1) + iops) / uploadInter + + rwSecs = field['3'][1] + field['7'][1] - field['3'][0] - field['7'][0] + bps = rwSecs / secs * 512 + ds['bps'] = (ds['bps'] * (uploadInter - 1) + bps) / uploadInter + + qusize = round((field['11'][1] - field['11'][0]) / secs / 1000, 2) + ds['qusize'] = (ds['qusize'] * (uploadInter - 1) + qusize) / uploadInter + + rwTiks = field['4'][1] + field['8'][1] - field['4'][0] - field['8'][0] + wait = round(rwTiks / (iops * secs), 2) if iops else 0 + ds['await'] = (ds['await'] * (uploadInter - 1) + wait) / uploadInter + + times = field['10'][1] - field['10'][0] + util = round(times * 100.0 / (secs * 1000), 2) + util = util if util <= 100 else 100.0 + ds['util'] = (ds['util'] * (uploadInter - 1) + util) / uploadInter + + return {'iops': iops*secs, 'bps': bps*secs, 'qusize': qusize*secs, + 'wait': wait, 'util': util} + + + def _checkDiagSwitchChange(self, t): + s = self.diagSwitch[t] + newSW = self.cfg.getCfgItem(t) + if newSW != s['sw'] and newSW == 'on': + for k,v in self.exceptionStat.items(): + if s['esi'] in v.keys(): + v[s['esi']]['max'] = 0 + s['sw'] = newSW + return newSW + + + def _checkIOwaitException(self, iowait): + exceptChk = self.exceptChkDicts['system'] + dataStat = self.dataStat['system'] + es = self.exceptionStat['system']['IOwait-High'] + diagnose = self.diagnose + uploadInter = self.uploadInter + + if iowait >= self.cfg.getCfgItem('iowait'): + exceptChk.disableThreshComp('iowait') + + dataStat['iowait'] = \ + (dataStat['iowait'] * (uploadInter - 1) + iowait) / uploadInter + + # Detect iowait exception + minThresh = self.cfg.getCfgItem('iowait') + iowaitThresh = max(exceptChk.getDynThresh('iowait'), minThresh) + if iowait >= iowaitThresh: + es['cur'] += 1 + diagSW = self._checkDiagSwitchChange('diagIowait') + rDiagValid = diagnose.recentDiagnoseValid('iowait') + # Configure iowait diagnosis + if diagSW == 'on' and (es['cur'] > es['max'] or not rDiagValid): + nrSample = exceptChk.getNrDataSample('iowait') + iowaitArg = max(int(iowait * 0.25), minThresh) + diagnose.setUpDiagnose('system', 'iowait', nrSample, iowaitArg) + + exceptChk.updateDynThresh('iowait', iowait) + + def _checkIoburstException(self, devname, es, bps, iops, exceptChk): + bpsLowW = self.cfg.getCfgItem('bps') + bpsHighW = max(exceptChk.getDynThresh('bps'), bpsLowW) + bpsMiddW = max(bpsLowW, bpsHighW / 2) + iopsLowW = self.cfg.getCfgItem('iops') + iopsHighW = max(exceptChk.getDynThresh('iops'), iopsLowW) + iopsMiddW = max(iopsLowW, iopsHighW / 2) + ioburst = exception = False + + if iops >= iopsMiddW or bps >= bpsMiddW: + ioburst = True + bpsOver = True if bps >= bpsHighW else False + iopsOver = True if iops >= iopsHighW else False + + if iopsOver or bpsOver: + es['cur'] += 1 + diagSW = self._checkDiagSwitchChange('diagIoburst') + rDiagValid = self.diagnose.recentDiagnoseValid('ioutil') + # Configure IO load diagnosis + if diagSW == 'on' and (es['cur'] > es['max'] or not rDiagValid): + bpsArg = iopsArg = 0 + if bpsOver == True: + bpsArg = max(int(bps * 0.25), bpsLowW) + if iopsOver == True: + iopsArg = max(int(iops * 0.7), iopsLowW) + nrSample = exceptChk.getNrDataSample('util') + self.diagnose.setUpDiagnose( + devname, 'ioutil', nrSample, bpsArg, iopsArg) + return ioburst + + def _checkIohangException( + self, devname, es, util, qusize, iops, exceptChk): + # Detect IO hang + if util >= 100 and qusize >= 1 and iops < 50: + # Configure IO hang diagnosis + if self.diagnose.isException(devname, 'iohang') == True: + es['cur'] += 1 + diagSW = self._checkDiagSwitchChange('diagIohang') + rDiagValid = self.diagnose.recentDiagnoseValid('iohang') + if diagSW == 'on' and (es['cur'] > es['max'] or not rDiagValid): + nrSample = exceptChk.getNrDataSample('util') + self.diagnose.setUpDiagnose(devname, 'iohang', nrSample) + + def _checkUtilException(self, devname, util, iops, bps, qusize): + exceptChk = self.exceptChkDicts[devname] + exceptionStat = self.exceptionStat[devname] + diagnose = self.diagnose + ioburst = False + + utilMinThresh = self.cfg.getCfgItem('util') + utilThresh = max(exceptChk.getDynThresh('util'), utilMinThresh) + if util >= utilThresh: + es = exceptionStat['IO-Burst'] + ioburst = \ + self._checkIoburstException(devname, es, bps, iops, exceptChk) + if not ioburst: + es = exceptionStat['IO-Hang'] + self._checkIohangException( + devname, es, util, qusize, iops, exceptChk) + exceptChk.updateDynThresh('util', util) + exceptChk.updateDynThresh('iops', iops) + exceptChk.updateDynThresh('bps', bps) + return ioburst + + + def _checkAwaitException(self, devname, wait, ioburst): + exceptChk = self.exceptChkDicts[devname] + es = self.exceptionStat[devname]['IO-Delay'] + diagnose = self.diagnose + + awaitMinThresh = self.cfg.getCfgItem('await') + awaitThresh = max(exceptChk.getDynThresh('await'), awaitMinThresh) + if wait >= awaitThresh: + es['cur'] += 1 + diagSW = self._checkDiagSwitchChange('diagIolat') + rDiagValid = diagnose.recentDiagnoseValid('iolatency') + # Configuring IO delay diagnostics + if diagSW == 'on' and (es['cur'] > es['max'] or not rDiagValid): + nrSample = exceptChk.getNrDataSample('await') + waitArg = max(int(wait * 0.4), awaitMinThresh) + diagnose.setUpDiagnose( + devname, 'iolatency', nrSample, waitArg, ioburst) + exceptChk.updateDynThresh('await', wait) + + + def _reportDataToRemote(self, devList): + # report datastat&&exception to database + nCycle = 1000.0 / float(self.cfg.getCfgItem('cycle')) + dataStat = self.dataStat['system'] + es = self.exceptionStat['system']['IOwait-High'] + + putIdx = ',idx_type=system_Indicator,devname=system ' + putField = 'iowait=%f,iowaithighCnt=%f' %( + dataStat['iowait'], es['cur'] / nCycle) + self._sender.puts(self._nfPutTlb4System + putIdx + putField) + + es['max'] = max(es['max'] if es['cur'] else 0, es['cur']) + es['cur'] = 0 + cur = {'IO-Delay':0, 'IO-Burst':0, 'IO-Hang':0} + for devname in devList: + dataStat = self.dataStat[devname] + es = self.exceptionStat[devname] + for type in cur.keys(): + cur[type] = int(es[type]['cur']) / nCycle + es[type]['max'] = \ + max(es[type]['max'] if cur[type] else 0, cur[type]) + es[type]['cur'] = 0 + + putIdx = ',idx_type=iostat_Indicator,devname=%s ' % devname + putField = 'await=%f,util=%f,iops=%f,bps=%f,qusize=%f' %( + dataStat['await'], dataStat['util'], dataStat['iops'], + dataStat['bps'] / 1024.0, dataStat['qusize'] + ) + putField += ',iodelayCnt=%f,ioburstCnt=%f,iohangCnt=%f' %( + cur['IO-Delay'], cur['IO-Burst'], cur['IO-Hang']) + self._sender.puts(self._nfPutTlb + putIdx + putField) + + + def _collectBegin(self): + fieldDicts = self.fieldDicts + + # collect iowait begin + with open("/proc/stat") as fStat: + cpuStatList = map(long, fStat.readline().split()[1:]) + self.cpuStatIowait['sum'] = sum(cpuStatList) + self.cpuStatIowait['iowait'] = cpuStatList[4] + + # collect iostat begin + self.fDiskStats.seek(0) + for stat in self.fDiskStats.readlines(): + stat = stat.split() + if os.path.exists('/sys/block/'+stat[2]) == False: + if stat[2] in fieldDicts.keys(): + self._removeDiskMonitor(stat[2]) + continue + + if stat[2] in fieldDicts.keys(): + field = fieldDicts[stat[2]] + else: + field = { + '1': [0, 0], '3': [0, 0], '4': [0, 0], + '5': [0, 0], '7': [0, 0], '8': [0, 0], + '10': [0, 0], '11': [0, 0]} + # add data staticsis for per-disk + fieldDicts.setdefault(stat[2], field) + self._addMonitorAttrForDisk(stat[2]) + + for idx, value in field.items(): + value[0] = long(stat[int(idx) + 2]) + + + def _collectEnd(self, secs): + fieldDicts = self.fieldDicts + exceptChkDicts = self.exceptChkDicts + uploadInter = self.uploadInter + + self.uploadInter = \ + 1 if ((uploadInter * secs) % 60) == 0 else (uploadInter + 1) + + # Calculate iowait + iowait = self._calcIowait() + # Detect iowait exception + self._checkIOwaitException(iowait) + + # collect iostat end + self.fDiskStats.seek(0) + for stat in self.fDiskStats.readlines(): + stat = stat.split() + if os.path.exists('/sys/block/'+stat[2]) == False: + if stat[2] in fieldDicts.keys(): + self._removeDiskMonitor(stat[2]) + continue + try: + for idx, value in fieldDicts[stat[2]].items(): + value[1] = long(stat[int(idx) + 2]) + except Exception: + continue + + for devname, field in fieldDicts.items(): + io = self._calcIoIndex(devname, field, secs) + if io['util'] >= self.cfg.getCfgItem('util'): + # There is IO Burst at present, turn off threshold compensation + self._disableThreshComp(devname, io['qusize']) + # Detect util exception + ioburst = self._checkUtilException( + devname, io['util'], io['iops'], io['bps'], io['qusize']) + # Detect await exception + self._checkAwaitException(devname, io['wait'], ioburst) + + if ((self.uploadInter * secs) % 60) == 0: + self._reportDataToRemote(fieldDicts.keys()) + + + def monitor(self): + while True: + secs = self.cfg.getCfgItem('cycle') / 1000.0 + self._collectBegin() + time.sleep(secs) + self._collectEnd(secs) + # Check if it is necessary to start the diagnosis + self.diagnose.checkDiagnose() + self.fDiskStats.close() + diff --git a/source/tools/monitor/ioMonitor/ioMon/ioMonitorMain.py b/source/tools/monitor/ioMonitor/ioMon/ioMonitorMain.py new file mode 100755 index 0000000000000000000000000000000000000000..921a343550f23126ec91c5ea91147adbb4ac6ce9 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/ioMonitorMain.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +import argparse +import signal +from ioMonCfgClass import ioMonCfgClass +from ioMonitorClass import ioMonitorClass + +setcfg_descripton = """set monitor cfg, like -s \'xxxx=xxx,xxx=xxx\' +iowait, The min cpu-iowait not report exceptions(1~100, default 5). +await, The min partition-await not report exceptions(N ms, default 10). +util, The min partition-util not report exceptions(1~100, default 20). +bps, The min partition-bps not report exceptions(bytes, default 30MB). +iops, The min partition-iops not report exceptions(default 150). +cycle, The cycle of Monitoring data collection(default 500ms). +diagIowait, Disable or enable diagnose while reporting iowait exceptions(default on). +diagIolat, Disable or enable diagnose while reporting latency exceptions(default on). +diagIoburst, Disable or enable diagnose while reporting ioburst exceptions(default on). +diagIohang, Disable or enable diagnose while reporting iohang exceptions(default on). +""" + +def getRunArgsFromYaml(yamlF): + logRootPath = '' + pipeFile = '' + with open(yamlF) as f: + lines = f.readlines() + for l in lines: + if not l.startswith('#'): + if 'proc_path:' in l: + logRootPath = l.split()[1].strip('\n') + elif 'outline:' in l: + pipeFile = lines[lines.index(l) + 1].split()[1].strip('\n') + if logRootPath and pipeFile: + break + if not logRootPath or not pipeFile: + raise ValueError( + 'Unable to get labels \"proc_path\" and \"outline\" in %s' % yamlF) + return logRootPath+'/run',pipeFile + + +def main(): + examples = """e.g. + ./ioMonitorMain.py -y [yaml_file] + Start ioMonitor + ./ioMonitorMain.py -y [yaml_file] --reset_cfg --only_set_cfg + Only reset cfg to default + ./ioMonitorMain.py -y [yaml_file] -s 'iowait=10,iops=200,diagIowait=on' --only_set_cfg + Only set min-iowait&&min-iops and disable iowait diagnose to config. + """ + parser = argparse.ArgumentParser( + description="start ioMonitor.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + parser.add_argument('-y','--yaml_file', + help='Specify the socket pipe for data upload'\ + ' and exception log path') + parser.add_argument('-s','--set_cfg', help=setcfg_descripton) + parser.add_argument('-r','--reset_cfg', action='store_true', + help='Reset cfg to default') + parser.add_argument('-o','--only_set_cfg', action='store_true', + help='Only set cfg') + args = parser.parse_args() + + signal.signal(signal.SIGCHLD, signal.SIG_IGN) + logRootPath,pipeFile = getRunArgsFromYaml(args.yaml_file) + if args.only_set_cfg: + if not os.path.exists(logRootPath+'/ioMon/ioMonCfg.json'): + print("%s" % ("config fail, not found ioMonCfg.json")) + return + if args.set_cfg is None and not args.reset_cfg: + print("%s" % ("--set_cfg or --reset_cfg not found.")) + return + ioMonCfg = ioMonCfgClass(args.set_cfg, args.reset_cfg, logRootPath) + ioMonCfg.notifyIoMon() + return + + ioMonCfg = ioMonCfgClass(args.set_cfg, args.reset_cfg, logRootPath) + ioMon = ioMonitorClass(logRootPath, ioMonCfg, pipeFile) + ioMon.monitor() + +if __name__ == "__main__": + main() diff --git a/source/tools/monitor/ioMonitor/ioMon/nfPut.py b/source/tools/monitor/ioMonitor/ioMon/nfPut.py new file mode 100755 index 0000000000000000000000000000000000000000..dbede66c852dc095a3ee90af8dd8a67673bdd39d --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/nfPut.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: nfPut + Description : + Author : liaozhaoyan + date: 2022/4/28 +------------------------------------------------- + Change Activity: + 2022/4/28: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import socket +MAX_BUFF = 128 * 1024 + + +class CnfPut(object): + def __init__(self, pipeFile): + self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) + self._path = pipeFile + if not os.path.exists(self._path): + raise ValueError("pipe path is not exist. please check unity is running.") + + + def puts(self, s): + if len(s) > MAX_BUFF: + raise ValueError("message len %d, is too long ,should less than%d" % (len(s), MAX_BUFF)) + return self._sock.sendto(s, self._path) + + +if __name__ == "__main__": + nf = CnfPut("/tmp/sysom") + pass diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/__init__.py b/source/tools/monitor/ioMonitor/ioMon/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb8e4b62acc8012add0dc0175a04b6b4f51940ec --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +if __name__ == "__main__": + pass diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/__init__.py b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb8e4b62acc8012add0dc0175a04b6b4f51940ec --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +if __name__ == "__main__": + pass diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/common.py b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/common.py new file mode 100755 index 0000000000000000000000000000000000000000..f01dbf271205c9063832d2f83f2aace435caa5f1 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/common.py @@ -0,0 +1,129 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import string +import re +from subprocess import PIPE, Popen + + +def execCmd(cmd): + p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) + return p.stdout.read().decode('utf-8') + + +def echoFile(filename, txt): + execCmd("echo \'"+txt+"\' > "+filename) + + +def echoFileAppend(filename, txt): + execCmd("echo \'"+txt+"\' >> "+filename) + + +def humConvert(value, withUnit): + units = ["B", "KB", "MB", "GB", "TB", "PB"] + size = 1024.0 + + if value == 0: + return value + + for i in range(len(units)): + if (value / size) < 1: + if withUnit: + return "%.1f%s/s" % (value, units[i]) + else: + return "%.1f" % (value) + value = value / size + + +def getDevt(devname): + try: + with open('/sys/class/block/' + devname + '/dev') as f: + dev = f.read().split(':') + return ((int(dev[0]) << 20) + int(dev[1])) + except Exception: + return -1 + + +def getDevtRegion(devname): + if os.path.exists('/sys/block/'+devname): + isPart = False + elif os.path.exists('/sys/class/block/'+devname): + isPart = True + else: + return [-1, -1] + + master = devname if not isPart else \ + os.readlink('/sys/class/block/'+devname).split('/')[-2] + partList = list( + filter(lambda x: master in x, + os.listdir('/sys/class/block/'+master))) + if not partList: + partList = [] + partList.append(master) + return [getDevt(p) for p in partList] + + +def getTgid(pid): + try: + with open("/proc/"+str(pid)+"/status") as f: + return ''.join(re.findall(r'Tgid:(.*)', f.read())).lstrip() + except IOError: + return '-' + return '-' + + +def fixComm(comm, pid): + try: + if ".." in comm: + with open("/proc/"+str(pid)+"/comm") as f: + return f.read().rstrip('\n') + except IOError: + return comm + return comm + + +def getContainerId(pid): + try: + piddir = "/proc/"+str(pid) + with open(piddir+"/cgroup") as f: + # ... + # cpuset,cpu,cpuacct:/docker/e2afa607d8f13e5b1f89d38ee86d86.... + # memory:/docker/e2afa607d8f13e5b1f89d38ee86..... + # ... + cid = f.read().split('memory:')[1].split('\n')[0].rsplit('/',1)[1] + if not cid: + cid = '-' + except Exception: + cid = '-' + return cid + + +def getFullNameFromProcPid(pid, ino): + try: + piddir = "/proc/"+str(pid) + # list the open files of the task + fdList = os.listdir(piddir+"/fd") + for f in fdList: + try: + path = os.readlink(piddir+"/fd/"+f) + if '/dev/' in path or '/proc/' in path or '/sys/' in path: + continue + + if os.path.isfile(path) and os.stat(path).st_ino == int(ino): + return path + except (IOError, EOFError) as e: + continue + except Exception: + pass + return "-" + + +def supportKprobe(name): + file = '/sys/kernel/debug/tracing/available_filter_functions' + with open(file) as f: + ss = f.read() + if ss.find(name) > 0: + return True + return False diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/diskstatClass.py b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/diskstatClass.py new file mode 100755 index 0000000000000000000000000000000000000000..c9755bfed0b770fc029d7a55a1e8a8153e0b2dac --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/diskstatClass.py @@ -0,0 +1,219 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import time +import json +import string +from collections import OrderedDict +from common import humConvert + + +class diskstatClass(object): + def __init__(self, devname, utilThresh, json, nodiskStat, Pattern): + self.devname = devname + self.json = json + self.cycle = 1 + self.started = False + self.Pattern = Pattern + self.nodiskStat = nodiskStat + self.utilThresh = int(utilThresh) if utilThresh is not None else 0 + self.fieldDicts = OrderedDict() + self.diskInfoDicts = {} + self.deviceStatDicts = {} + self.f = open("/proc/diskstats") + if json: + self.fJson = open(json, 'w+') + + + def getDevNameByDevt(self, devt): + try: + return self.diskInfoDicts[str(devt)]['partName'] + except Exception: + return '-' + + def getMasterDev(self, devt): + try: + return self.diskInfoDicts[str(devt)]['master'] + except Exception: + return '-' + + def getDiskStatInd(self, disk, key): + return self.deviceStatDicts[disk][key] + + def start(self): + fieldDicts = self.fieldDicts + diskInfoDicts = self.diskInfoDicts + deviceStatDicts = self.deviceStatDicts + + if self.started: + return + self.started = True + self.cycle = time.time() + self.f.seek(0) + for stat in self.f.readlines(): + stat = stat.split() + if self.devname is not None and self.devname not in stat[2] and \ + stat[2] not in self.devname: + continue + + field = {\ + '1':[0,0], '2':[0,0], '3':[0,0], '4':[0,0],\ + '5':[0,0], '6':[0,0], '7':[0,0], '8':[0,0],\ + '10':[0,0], '11':[0,0]} + for idx,value in field.items(): + value[0] = long(stat[int(idx)+2]) + if stat[2] not in fieldDicts.keys(): + fieldDicts.setdefault(stat[2], field) + path = os.readlink('/sys/class/block/'+stat[2]).split('/') + master = path[-2] + if master not in path[-1]: + master = path[-1] + diskInfoDicts.setdefault( + str((int(stat[0])<<20)+int(stat[1])), + {'partName': stat[2], 'master': master}) + deviceStat = { + 'r_rqm':0, 'w_rqm':0, 'r_iops':0, 'w_iops':0, 'r_bps':0, + 'w_bps':0, 'wait':0, 'r_wait':0, 'w_wait':0, 'util%':-1} + deviceStatDicts.setdefault(stat[2], deviceStat) + else: + deviceStatDicts[stat[2]]['util%'] = -1 + fieldDicts[stat[2]].update(field) + + def stop(self): + fieldDicts = self.fieldDicts + deviceStatDicts = self.deviceStatDicts + self.cycle = max(int(time.time()-self.cycle), 1) + + if not self.started: + return + self.started = False + + self.f.seek(0) + for stat in self.f.readlines(): + stat = stat.split() + if self.devname is not None and self.devname not in stat[2] and \ + stat[2] not in self.devname: + continue + for idx,value in fieldDicts[stat[2]].items(): + value[1] = long(stat[int(idx)+2]) + + for devname,field in fieldDicts.items(): + if self.devname is not None and devname not in self.devname and \ + self.devname not in devname: + continue + util = round((field['10'][1]-field['10'][0])*100.0/(self.cycle*1000),2) + util = util if util <= 100 else 100.0 + if util < self.utilThresh: + continue + deviceStatDicts[devname]['util%'] = util + r_iops = field['1'][1]-field['1'][0] + deviceStatDicts[devname]['r_iops'] = r_iops + r_rqm = field['2'][1]-field['2'][0] + deviceStatDicts[devname]['r_rqm'] = r_rqm + w_iops = field['5'][1]-field['5'][0] + deviceStatDicts[devname]['w_iops'] = w_iops + w_rqm = field['6'][1]-field['6'][0] + deviceStatDicts[devname]['w_rqm'] = w_rqm + r_bps = (field['3'][1]-field['3'][0]) * 512 + deviceStatDicts[devname]['r_bps'] = r_bps + w_bps = (field['7'][1]-field['7'][0]) * 512 + deviceStatDicts[devname]['w_bps'] = w_bps + r_ticks = field['4'][1]-field['4'][0] + w_ticks = field['8'][1]-field['8'][0] + wait = round((r_ticks+w_ticks)/(r_iops+w_iops), 2) if (r_iops+w_iops) else 0 + deviceStatDicts[devname]['wait'] = wait + r_wait = round(r_ticks / r_iops, 2) if r_iops else 0 + deviceStatDicts[devname]['r_wait'] = r_wait + w_wait = round(w_ticks / w_iops, 2) if w_iops else 0 + deviceStatDicts[devname]['w_wait'] = w_wait + + + def __showJson(self): + deviceStatDicts = self.deviceStatDicts + + statJsonStr = '{\ + "time":"",\ + "diskstats":[]}' + dstatDicts = json.loads(statJsonStr, object_pairs_hook=OrderedDict) + dstatDicts['time'] = time.strftime('%Y/%m/%d %H:%M:%S', time.localtime()) + for devname,stat in deviceStatDicts.items(): + if stat['util%'] < 0: + continue + dstatJsonStr = '{\ + "diskname":"","r_rqm":0,"w_rqm":0,"r_iops":0,"w_iops":0,\ + "r_bps":0,"w_bps":0,"wait":0,"r_wait":0,"w_wait":0,"util%":0}' + dstatDict = json.loads(dstatJsonStr, object_pairs_hook=OrderedDict) + dstatDict["diskname"] = devname + for key,val in stat.items(): + dstatDict[key] = val + dstatDicts["diskstats"].append(dstatDict) + if len(dstatDicts["diskstats"]) > 0: + data = json.dumps(dstatDicts) + self.writeDataToJson(data) + return + + def show(self): + secs = self.cycle + deviceStatDicts = self.deviceStatDicts + if self.nodiskStat: + return + + if self.enableJsonShow() == True: + self.__showJson() + return + + if self.Pattern: + WrTotalIops = 0 + RdTotalIops = 0 + WrTotalBw = 0 + RdTotalBw = 0 + print('%-20s%-8s%-8s%-8s%-8s%-12s%-12s%-8s%-8s%-8s%-8s' %\ + (("device-stat:"),"r_rqm","w_rqm","r_iops","w_iops","r_bps",\ + "w_bps","wait","r_wait","w_wait","util%")) + stSecs = str(secs)+'s' if secs > 1 else 's' + for devname,stat in deviceStatDicts.items(): + if (not self.devname and not os.path.exists('/sys/block/'+devname)) or \ + stat['util%'] < 0: + continue + if self.Pattern: + WrTotalIops += stat['w_iops'] + RdTotalIops += stat['r_iops'] + WrTotalBw += stat['w_bps'] + RdTotalBw += stat['r_bps'] + stWbps = humConvert(stat['w_bps'], True).replace('s', stSecs) if stat['w_bps'] else 0 + stRbps = humConvert(stat['r_bps'], True).replace('s', stSecs) if stat['r_bps'] else 0 + print('%-20s%-8s%-8s%-8s%-8s%-12s%-12s%-8s%-8s%-8s%-8s' %\ + (devname, str(stat['r_rqm']), str(stat['w_rqm']), str(stat['r_iops']), + str(stat['w_iops']), stRbps, stWbps, str(stat['wait']), str(stat['r_wait']), + str(stat['w_wait']), str(stat['util%']))) + if self.Pattern: + print('totalIops:%d(r:%d, w:%d), totalBw:%s(r:%s, w:%s)' % + ((WrTotalIops+RdTotalIops), RdTotalIops, WrTotalIops, + (humConvert((WrTotalBw+RdTotalBw), True).replace('s', stSecs) if (WrTotalBw+RdTotalBw > 0) else 0), + (humConvert(RdTotalBw, True).replace('s', stSecs) if RdTotalBw else 0), + (humConvert(WrTotalBw, True).replace('s', stSecs) if WrTotalBw else 0))) + print("") + + def clear(self): + self.f.close() + if self.enableJsonShow(): + self.fJson.close() + + def notCareDevice(self, devname): + if not self.nodiskStat and self.deviceStatDicts[devname]['util%'] < 0: + return True + return False + + def disableShow(self): + deviceStatDicts = self.deviceStatDicts + for devname,stat in deviceStatDicts.items(): + if self.deviceStatDicts[devname]['util%'] >= 0: + return False + return True + + def enableJsonShow(self): + return True if self.json else False + + def writeDataToJson(self, data): + self.fJson.write(data+'\n') diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/fsstatClass.py b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/fsstatClass.py new file mode 100755 index 0000000000000000000000000000000000000000..763de3024c7032bb148aa6238e99b77fc49814f9 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/fsstatClass.py @@ -0,0 +1,418 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import time +import re +import json +from collections import OrderedDict +from diskstatClass import diskstatClass +from common import getDevt,getDevtRegion +from common import humConvert,supportKprobe +from common import execCmd,echoFile,echoFileAppend +from common import getTgid,fixComm,getContainerId,getFullNameFromProcPid +from mmap import PAGESIZE + + +def getMntPath(fileInfoDict): + mntfname = fileInfoDict['mntfname'] + fsmountInfo = fileInfoDict['fsmountinfo'] + + if len(fsmountInfo) <= 0: + return '-' + + if mntfname.isspace() or len(mntfname) == 0: + return fsmountInfo[0].split()[1] + try: + for l in fsmountInfo: + if l.find(mntfname) > -1: + return l.split()[1] + return '-' + except IndexError: + return fsmountInfo[0].split()[1] + + +def getFullName(fileInfoDict): + fileSuffix = '' + + mntdir = getMntPath(fileInfoDict) + if mntdir == '/': + mntdir = '' + + for f in [ + fileInfoDict['d3fname'], fileInfoDict['d2fname'], + fileInfoDict['d1fname'], fileInfoDict['bfname']]: + if f != '/' and f != '(fault)': + fileSuffix += ('/' + f) + if fileInfoDict['d3fname'] != '/' and fileInfoDict['d3fname'] != '(fault)': + fileSuffix = '/...' + fileSuffix + filename = mntdir + fileSuffix + + if '...' in filename: + f = getFullNameFromProcPid( + fileInfoDict['pid'], fileInfoDict['ino']) + if f != '-': + filename = f + return filename + + +class fsstatClass(diskstatClass): + def __init__( + self, devname, pid, utilThresh, bwThresh, top, + json, nodiskStat, miscStat, Pattern): + super(fsstatClass, self).__init__( + devname, utilThresh, json, nodiskStat, Pattern) + self.expression = [] + self.pid = pid + self.miscStat = miscStat + self.devname = devname + self.top = int(top) if top is not None else 99999999 + self.bwThresh = int(bwThresh) if bwThresh is not None else 0 + self.devt = getDevtRegion(devname) if devname is not None else [-1, -1] + tracingBaseDir = "/sys/kernel/debug/tracing" + self.kprobeEvent = tracingBaseDir+"/kprobe_events" + self.tracingDir = tracingBaseDir+'/instances/iofsstat4fs' + self.kprobeDir = self.tracingDir+"/events/kprobes" + self.kprobe = [] + self.kprobeArgsFormat = 'dev=+0x10(+0x28(+0x20(%s))):u32 '\ + 'inode_num=+0x40(+0x20(%s)):u64 len=%s:u64 '\ + 'mntfname=+0x0(+0x28(+0x0(+0x10(%s)))):string '\ + 'bfname=+0x0(+0x28(+0x18(%s))):string '\ + 'd1fname=+0x0(+0x28(+0x18(+0x18(%s)))):string '\ + 'd2fname=+0x0(+0x28(+0x18(+0x18(+0x18(%s))))):string '\ + 'd3fname=+0x0(+0x28(+0x18(+0x18(+0x18(+0x18(%s)))))):string %s' + + kprobeArgs = self._getKprobeArgs('None') + self.ftracePaserCommArgs = ' comm=(.*)' if 'comm=' in kprobeArgs else '' + mmapKprobeArgs = self._getKprobeArgs('mmap') + self.fsmountInfo = self._getFsMountInfo() + for entry in self.fsmountInfo: + fstype = entry.split()[2] + self._kprobeReadWrite(fstype, kprobeArgs) + self._kprobeMmap(fstype, mmapKprobeArgs) + if len(self.kprobe) <= 0: + print("%s" % ("error: not available kprobe")) + sys.exit(-1) + self.outlogFormatBase = 10 + + def _kprobeReadWrite(self, fstype, kprobeArgs): + for op in ['write', 'read']: + kPoints = [ + fstype+"_file_"+op+"_iter", fstype+"_file_"+op, + fstype+"_file_aio_"+op, "generic_file_aio_"+op] + if list(set(self.kprobe) & set(kPoints)): + continue + kprobe = None + for k in kPoints: + if supportKprobe(k): + kprobe = k + break + if not kprobe: + if self.enableJsonShow() == False: + print("warnning: not available %s kprobe" % op) + continue + pointKprobe = 'p '+kprobe+' '+kprobeArgs + self.kprobe.append(kprobe) + self.expression.append(pointKprobe) + + def _kprobeMmap(self, fstype, kprobeArgs): + for kprobe in [fstype+"_page_mkwrite", 'filemap_fault']: + if kprobe in self.kprobe: + continue + if not supportKprobe(kprobe): + if self.enableJsonShow() == False: + print("not support kprobe %s" % kprobe) + continue + pointKprobe = 'p '+kprobe+' '+kprobeArgs + self.kprobe.append(kprobe) + self.expression.append(pointKprobe) + + def _getKprobeArgs(self, type): + commArgs = '' + vinfo = execCmd('uname -r') + version = vinfo.split('.') + + if type == 'mmap': + offFile = '0xa0(+0x0%s)' if int(version[0]) > 4 or ( + int(version[0]) == 4 and int(version[1]) > 10) else '0xa0%s' + offLen = '0x0(+0x0%s)' if int(version[0]) > 4 or ( + int(version[0]) == 4 and int(version[1]) > 10) else '0x0%s' + else: + offLen = '0x10' + offFile = '0x0' if int(version[0]) > 3 or ( + int(version[0]) == 3 and int(version[1]) > 10) else '0x8' + if int(version[0]) <= 3: + offLen = '0x8' if int(version[1]) < 13 else '0x18' + + if int(version[0]) > 3: + commArgs = 'comm=$comm' + + re= execCmd('lscpu | grep -E \"Architecture|架构\" | sed \"s/:/:/g\"') + arch = re.split(':')[1].strip() + regs = { + "arm":['(%r0)','(%r1)'], + "x86":['(%di)', '(%si)'], + "aarch64":['(%x0)','(%x1)']} + argv0 = argv1 = '' + for key,val in regs.items(): + if arch.startswith(key): + if type == 'mmap': + argv0 = '+' + (offFile % val[0]) + argv1 = '+' + (offLen % val[0]) + argv2 = argv1 + else: + argv2 = argv0 = '+' + offFile + val[0] + argv1 = '+' + offLen + val[1] + break + if argv0 == '': + raise ValueError('arch %s not support' % arch) + + kprobeArgs = self.kprobeArgsFormat % ( + argv0, argv0, argv1, argv0, argv0, argv0, argv0, argv2, commArgs) + return kprobeArgs + + def _getFsMountInfo(self): + devList = [] + if self.devname is not None: + devList.append('/dev/'+self.devname) + else: + sysfsBlockDirList = os.listdir("/sys/block") + for dev in sysfsBlockDirList: + devList.append('/dev/'+dev) + with open("/proc/mounts") as f: + fsmountInfo = list(filter(lambda x: any( + e in x for e in devList), f.readlines())) + return fsmountInfo + + def config(self): + devt = self.devt + + if not os.path.exists(self.tracingDir): + os.mkdir(self.tracingDir) + for exp in self.expression: + probe = 'p_'+exp.split()[1]+'_0' + enableKprobe = self.kprobeDir+"/"+probe+"/enable" + filterKprobe = self.kprobeDir+"/"+probe+"/filter" + if os.path.exists(enableKprobe): + echoFile(enableKprobe, "0") + if devt[0] > 0: + echoFile(filterKprobe, "0") + echoFileAppend(self.kprobeEvent, '-:%s' % probe) + + echoFileAppend(self.kprobeEvent, exp) + if devt[0] > 0: + dev = getDevt(self.devname) + if dev == min(devt): + echoFile(filterKprobe, + "dev>="+str(min(devt))+"&&dev<="+str(max(devt))) + else: + echoFile(filterKprobe, "dev=="+str(dev)) + echoFile(enableKprobe, "1") + fmt = execCmd("grep print "+self.kprobeDir+"/"+probe+"/format") + matchObj = re.match(r'(.*) dev=(.*) inode_num=(.*)', fmt) + if 'x' in matchObj.group(2): + self.outlogFormatBase = 16 + + def start(self): + echoFile(self.tracingDir+"/trace", "") + echoFile(self.tracingDir+"/tracing_on", "1") + super(fsstatClass, self).start() + + def stop(self): + echoFile(self.tracingDir+"/tracing_on", "0") + super(fsstatClass, self).stop() + + def clear(self): + for exp in self.expression: + probe = 'p_'+exp.split()[1]+'_0' + enableKprobe = self.kprobeDir+"/"+probe+"/enable" + if not os.path.exists(enableKprobe): + continue + echoFile(enableKprobe, "0") + if self.devt[0] > 0: + filterKprobe = self.kprobeDir+"/"+probe+"/filter" + echoFile(filterKprobe, "0") + echoFileAppend(self.kprobeEvent, '-:%s' % probe) + super(fsstatClass, self).clear() + + def _paserTraceToStat(self, traceText): + bwTotal = 0 + stat = {} + mStat = {} + fileInfoDict = { + 'device': 0, 'mntfname': '', 'bfname': '', 'd1fname': '', + 'd2fname': '', 'd3fname': '', 'fsmountinfo': '', 'ino': 0, + 'pid': 0} + commArgs = self.ftracePaserCommArgs + hasCommArgs = True if len(commArgs) else False + + # pool-1-thread-2-5029 [002] .... 5293018.252338: p_ext4_file_write_iter_0:\ + # (ext4_file_write_iter+0x0/0x6d0 [ext4]) dev=265289729 inode_num=530392 len=38 + # ... + for entry in traceText: + if ('dev=' not in entry) or ('.so' in entry and 'lib' in entry) or ( + '=\"etc\"' in entry) or ('=\"usr\"' in entry and ( + '=\"bin\"' in entry or '=\"sbin\"' in entry)): + continue + + matchObj = re.match( + r'(.*) \[([^\[\]]*)\] (.*) dev=(.*) inode_num=(.*) len=(.*)'+ + ' mntfname=(.*) bfname=(.*) d1fname=(.*) d2fname=(.*)'+ + ' d3fname=(.*)'+commArgs, entry) + if matchObj is None: + continue + + pid = (matchObj.group(1).rsplit('-', 1))[1].strip() + dev = int(matchObj.group(4), self.outlogFormatBase) + if (self.pid is not None and int(pid) != self.pid) or \ + str(dev) == '0': + continue + + if hasCommArgs: + comm = matchObj.group(12).strip("\"") + else: + comm = (matchObj.group(1).rsplit('-', 1))[0].strip() + comm = fixComm(comm, pid) + if '..' in comm: + continue + + device = self.getDevNameByDevt(dev) + if device == '-': + continue + if self.miscStat is not None: + disk = self.getMasterDev(dev) + if not mStat.has_key(disk): + mStat.setdefault(disk, {}) + stat = mStat[disk] + + ino = int(matchObj.group(5), self.outlogFormatBase) + inoTask = str(ino)+':'+str(comm)+':'+device + if not stat.has_key(inoTask): + fsmountinfo = [f for f in self.fsmountInfo if ('/dev/'+device) in f] + fileInfoDict['device'] = device + fileInfoDict['mntfname'] = matchObj.group(7).strip("\"") + fileInfoDict['bfname'] = matchObj.group(8).strip("\"") + fileInfoDict['d1fname'] = matchObj.group(9).strip("\"") + fileInfoDict['d2fname'] = matchObj.group(10).strip("\"") + fileInfoDict['d3fname'] = matchObj.group(11).strip("\"") + fileInfoDict['fsmountinfo'] = fsmountinfo + fileInfoDict['ino'] = ino + fileInfoDict['pid'] = pid + stat.setdefault(inoTask, + {"inode":str(ino), "comm": comm, "tgid": getTgid(pid), "pid": pid, + "cnt_wr": 0, "bw_wr": 0, "cnt_rd": 0, "bw_rd": 0, "device": device, + "cid":getContainerId(pid), "file": getFullName(fileInfoDict)}) + + size = int(matchObj.group(6), self.outlogFormatBase) + if 'filemap_fault' in entry or 'page_mkwrite' in entry: + size = PAGESIZE + if 'write' in entry or 'page_mkwrite' in entry: + stat[inoTask]["cnt_wr"] += 1 + stat[inoTask]["bw_wr"] += int(size) + if 'read' in entry or 'filemap_fault' in entry: + stat[inoTask]["cnt_rd"] += 1 + stat[inoTask]["bw_rd"] += int(size) + if pid != stat[inoTask]["pid"]: + stat[inoTask]["pid"] = pid + stat[inoTask]["tgid"] = getTgid(pid) + if stat[inoTask]["cid"] == '-': + stat[inoTask]["cid"] = getContainerId(pid) + bwTotal += int(size) + return bwTotal,stat,mStat + + def _joinMiscStat(self, mStat): + for d,val in self.miscStat: + if d not in mStat.keys(): + mStat.setdefault(d, {}) + mStat[d].update(dict(val)) + tmpStat = [] + for d,val in mStat.items(): + idxSort = 'bw_wr' + if self.getDiskStatInd(d, 'w_iops') < self.getDiskStatInd(d, 'r_iops'): + idxSort = 'bw_rd' + s = sorted( + val.items(), key=lambda e: (e[1][idxSort]), reverse=True)[:self.top] + tmpStat.append((d, s)) + del self.miscStat[:] + self.miscStat.extend(tmpStat) + return 0 + + def showJson(self, stat): + secs = self.cycle + statJsonStr = '{"time":"","fsstats":[]}' + fstatDicts = json.loads(statJsonStr, object_pairs_hook=OrderedDict) + fstatDicts['time'] = time.strftime( + '%Y/%m/%d %H:%M:%S', time.localtime()) + stSecs = str(secs)+'s' if secs > 1 else 's' + for key, item in stat.items(): + if (item["cnt_wr"] + item["cnt_rd"]) == 0: + continue + item["bw_wr"] = \ + humConvert(item["bw_wr"], True).replace('s', stSecs) if item["bw_wr"] else 0 + item["bw_rd"] = \ + humConvert(item["bw_rd"], True).replace('s', stSecs) if item["bw_rd"] else 0 + fsstatJsonStr = '{\ + "inode":0,"comm":"","tgid":0,"pid":0,"cnt_rd":0,\ + "bw_rd":0,"cnt_wr":0,"bw_wr":0,"device":0,"cid":0,"file":0}' + fsstatDict = json.loads( + fsstatJsonStr, object_pairs_hook=OrderedDict) + for key, val in item.items(): + fsstatDict[key] = val + fstatDicts["fsstats"].append(fsstatDict) + if len(fstatDicts["fsstats"]) > 0: + self.writeDataToJson(json.dumps(fstatDicts)) + + def printStat(self, stat): + secs = self.cycle + print("%-20s%-8s%-8s%-24s%-8s%-12s%-8s%-12s%-12s%-12s%-32s%s" + % ("comm", "tgid", "pid", "cid", "cnt_rd", "bw_rd", "cnt_wr", + "bw_wr", "inode", "device", "filepath")) + stSecs = str(secs)+'s' if secs > 1 else 's' + for key, item in stat: + if (item["cnt_wr"] + item["cnt_rd"]) == 0: + continue + item["bw_wr"] = \ + humConvert(item["bw_wr"], True).replace('s', stSecs) if item["bw_wr"] else 0 + item["bw_rd"] = \ + humConvert(item["bw_rd"], True).replace('s', stSecs) if item["bw_rd"] else 0 + print("%-20s%-8s%-8s%-24s%-8d%-12s%-8d%-12s%-12s%-12s%s" + % (item["comm"], item["tgid"], item["pid"], item["cid"][0:20], + item["cnt_rd"], item["bw_rd"], item["cnt_wr"], item["bw_wr"], + item["inode"], item["device"], item["file"])) + print("") + + def show(self): + secs = self.cycle + with open(self.tracingDir+"/trace") as f: + traceText = f.read().split('\n') + #traceText = f.readlines() + #traceText = \ + # list(filter(lambda x: any(e in x for e in self.kprobe), f.readlines())) + bwTotal,stat,mStat = self._paserTraceToStat(traceText) + + if self.miscStat is not None: + return self._joinMiscStat(mStat) + elif (self.bwThresh and (bwTotal/secs) < self.bwThresh): + return + + stat = sorted(stat.items(), key=lambda e: ( + e[1]["bw_wr"]+e[1]["bw_rd"]), reverse=True)[:self.top] + + if self.enableJsonShow() == False: + print(time.strftime('%Y/%m/%d %H:%M:%S', time.localtime())) + if self.disableShow() == False: + super(fsstatClass, self).show() + + if self.enableJsonShow() == True: + self.showJson(stat) + else: + self.printStat(stat) + + def entry(self, interval): + self.start() + time.sleep(float(interval)) + self.stop() + self.show() diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/iofsstat.py b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/iofsstat.py new file mode 100755 index 0000000000000000000000000000000000000000..1829031599d4ce4f642b484e4bb2c75f16e3e683 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/iofsstat.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import argparse +import threading +from collections import OrderedDict +from iostatClass import iostatClass +from fsstatClass import fsstatClass +from promiscClass import promiscClass +import time + +global_iofsstat_stop = False +def signal_exit_handler(signum, frame): + global global_iofsstat_stop + global_iofsstat_stop = True + +def exit_handler(): + global global_iofsstat_stop + global_iofsstat_stop = True + +def iofsstatStart(argv): + global global_iofsstat_stop + global_iofsstat_stop = False + if os.geteuid() != 0: + print("%s" % ("This program must be run as root. Aborting.")) + sys.exit(0) + examples = """e.g. + ./iofsstat.py -d vda -c 1 + Report iops and bps of process for vda per 1secs + ./iofsstat.py -d vda1 --fs -c 1 + Report fs IO-BW and file of process for vda1(must be parttion mounted by filesystem) per 1secs + ./iofsstat.py -m -c 5 -t 5 + Report top5 iops&&bps&&file of process with misc mode per 5secs + ./iofsstat.py -d vda -c 1 -b 1048576 -i 350 + Report process that iops over 350 or bps over 1048576 for vda per 1secs + ./iofsstat.py -u 90 + Report disk that io-util over %90 + """ + parser = argparse.ArgumentParser( + description="Report IO statistic for partitions.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + parser.add_argument('-T','--Timeout', help='Specify the timeout for program exit(secs).') + parser.add_argument('-t','--top', help='Report the TopN with the largest IO resources.') + parser.add_argument('-u','--util_thresh', help='Specify the util-thresh to report.') + parser.add_argument('-b','--bw_thresh', help='Specify the BW-thresh to report.') + parser.add_argument('-i','--iops_thresh', help='Specify the IOPS-thresh to report.') + parser.add_argument('-c','--cycle', help='Specify refresh cycle(secs).') + parser.add_argument('-d','--device', help='Specify the disk name.') + parser.add_argument('-p','--pid', help='Specify the process id.') + parser.add_argument('-j','--json', help='Specify the json-format output.') + parser.add_argument('-f','--fs', action='store_true', + help='Report filesystem statistic for partitions.') + parser.add_argument('-P','--Pattern', action='store_true', + help='Report IO pattern(--fs not support).') + parser.add_argument('-n','--nodiskStat', action='store_true', + help='Not report disk stat.') + parser.add_argument('-m','--misc', action='store_true', + help='Promiscuous mode.') + args = parser.parse_args(argv) if argv else parser.parse_args() + + secs = float(args.cycle) if args.cycle is not None else 0 + devname = args.device + pid = int(args.pid) if args.pid else None + if argv is None: + signal.signal(signal.SIGINT, signal_exit_handler) + signal.signal(signal.SIGHUP, signal_exit_handler) + signal.signal(signal.SIGTERM, signal_exit_handler) + if args.Timeout is not None: + timeoutSec = args.Timeout if args.Timeout > 0 else 10 + secs = secs if secs > 0 else 1 + if argv is None: + signal.signal(signal.SIGALRM, signal_exit_handler) + signal.alarm(int(timeoutSec)) + else: + timer = threading.Timer(int(timeoutSec), exit_handler) + timer.start() + loop = True if secs > 0 else False + interval = secs if loop == True else 1 + if args.misc: + c = promiscClass(devname, args.util_thresh, args.iops_thresh, + args.bw_thresh, args.top, args.json, args.nodiskStat, + args.Pattern) + else: + if args.fs: + c = fsstatClass(devname, pid, args.util_thresh, args.bw_thresh, + args.top, args.json, args.nodiskStat, None, args.Pattern) + else: + c = iostatClass(devname, pid, args.util_thresh, args.iops_thresh, + args.bw_thresh, args.top, args.json, args.nodiskStat, None, + args.Pattern) + c.config() + while global_iofsstat_stop != True: + c.entry(interval) + if loop == False: + break + c.clear() + +def main(): + iofsstatStart(None) + +if __name__ == "__main__": + main() diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/iostatClass.py b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/iostatClass.py new file mode 100755 index 0000000000000000000000000000000000000000..e7d03e85dfdd7fe0499f838dd1f427dbf79461d4 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/iostatClass.py @@ -0,0 +1,244 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import time +import re +import json +from collections import OrderedDict +from diskstatClass import diskstatClass +from common import getDevtRegion +from common import humConvert,echoFile +from common import getContainerId + + +class iostatClass(diskstatClass): + def __init__( + self, devname, pid, utilThresh, iopsThresh, bwThresh, + top, json, nodiskStat, miscStat, Pattern): + super(iostatClass, self).__init__( + devname, utilThresh, json, nodiskStat, Pattern) + self.pid = pid + self.miscStat = miscStat + self.top = int(top) if top is not None else 99999999 + self.iopsThresh = int(iopsThresh) if iopsThresh is not None else 0 + self.bwThresh = int(bwThresh) if bwThresh is not None else 0 + self.devt = min(getDevtRegion(devname)) if devname is not None else -1 + self.tracingDir = "/sys/kernel/debug/tracing/instances/iofsstat4io" + self.blkTraceDir = self.tracingDir+"/events/block" + + def config(self): + devt = self.devt + if not os.path.exists(self.tracingDir): + os.mkdir(self.tracingDir) + if devt > 0: + echoFile(self.blkTraceDir+"/block_getrq/filter", "dev=="+str(devt)) + else: + echoFile(self.blkTraceDir+"/block_getrq/filter", "") + echoFile(self.blkTraceDir+"/block_getrq/enable", "1") + + def start(self): + echoFile(self.tracingDir+"/trace", "") + echoFile(self.tracingDir+"/tracing_on", "1") + super(iostatClass, self).start() + + def stop(self): + echoFile(self.tracingDir+"/tracing_on", "0") + super(iostatClass, self).stop() + + def clear(self): + echoFile(self.blkTraceDir+"/block_getrq/enable", "0") + if self.devt > 0: + echoFile(self.blkTraceDir+"/block_getrq/filter", "0") + super(iostatClass, self).clear() + + def showJson(self, stat): + secs = self.cycle + statJsonStr = '{"time":"","iostats":[]}' + iostatDicts = json.loads(statJsonStr, object_pairs_hook=OrderedDict) + iostatDicts['time'] = time.strftime( + '%Y/%m/%d %H:%M:%S', time.localtime()) + stSecs = str(secs)+'s' if secs > 1 else 's' + for key, item in stat.items(): + if (item["iops_rd"] + item["iops_wr"]) == 0: + continue + item["bps_rd"] = \ + humConvert(item["bps_rd"], True).replace('s', stSecs) if item["bps_rd"] else 0 + item["bps_wr"] = \ + humConvert(item["bps_wr"], True).replace('s', stSecs) if item["bps_wr"] else 0 + iostatJsonStr = '{\ + "comm":"","pid":0,"bps_rd":0,"iops_rd":0,"iops_wr":0,"bps_wr":0,"device":0}' + iostatDict = json.loads(iostatJsonStr, object_pairs_hook=OrderedDict) + for key in ['comm', 'pid', 'bps_rd', 'iops_rd', 'iops_wr', 'bps_wr', 'device']: + iostatDict[key] = item[key] + iostatDicts["iostats"].append(iostatDict) + if len(iostatDicts["iostats"]) > 0: + self.writeDataToJson(json.dumps(iostatDicts)) + + def patternIdx(self, size): + dp = [ + ("pat_W4K", (4*1024)), ("pat_W16K", (16*1024)), + ("pat_W32K", (32*1024)), ("pat_W64K",(64*1024)), + ("pat_W128K", (128*1024)), ("pat_W256K", (256*1024)), + ("pat_W512K", (512*1024))] + for d in dp: + if size <= d[1]: + return d[0] + return 'pat_Wlarge' + + def patternPercent(self, pat, total): + if total == 0 or pat == 0: + return '0' + return format(pat / (total * 1.0) * 100, '.2f') + '%' + + def show(self): + iopsTotal = 0 + WrIopsTotal = 0 + RdIopsTotal = 0 + bwTotal = 0 + WrBwTotal = 0 + RdBwTotal = 0 + stat = {} + mStat = {} + secs = self.cycle + with open(self.tracingDir+"/trace") as f: + traceText = list( + filter(lambda x: 'block_getrq' in x, f.readlines())) + # jbd2/vda1-8-358 ... : block_getrq: 253,0 WS 59098136 + 120 [jbd2/vda1-8] + for entry in traceText: + oneIO = entry.split() + matchObj = re.match( + r'(.*) \[([^\[\]]*)\] (.*) \[([^\[\]]*)\]\n', entry) + comm = matchObj.group(4) + pid = matchObj.group(1).rsplit('-', 1)[1].strip() + if self.pid is not None and int(pid) != self.pid: + continue + devinfo = oneIO[-6-comm.count(' ')].split(',') + dev = ((int(devinfo[0]) << 20) + int(devinfo[1])) + if str(dev) == '0': + continue + device = self.getDevNameByDevt(dev) + if device == '-' or self.notCareDevice(device) == True: + continue + if self.miscStat is not None: + if not mStat.has_key(device): + mStat.setdefault(device, {}) + stat = mStat[device] + iotype = oneIO[-5-comm.count(' ')] + sectors = oneIO[-2-comm.count(' ')] + task = str(pid)+':'+device + if bool(stat.has_key(task)) != True: + stat.setdefault(task, + {"comm":"", "pid": pid, "iops_rd": 0, + "iops_wr": 0, "bps_rd": 0, "bps_wr": 0, + "flushIO": 0, "device": device, + "cid":getContainerId(pid), + "pat_W4K":0, "pat_W16K":0, "pat_W32K":0, + "pat_W64K":0, "pat_W128K":0, "pat_W256K":0, + "pat_W512K":0, "pat_Wlarge":0}) + size = int(sectors) * 512 + if len(comm) > 0: + stat[task]["comm"] = comm + if 'R' in iotype: + stat[task]["iops_rd"] += 1 + stat[task]["bps_rd"] += size + bwTotal += size + iopsTotal += 1 + if 'W' in iotype: + stat[task]["iops_wr"] += 1 + stat[task]["bps_wr"] += size + bwTotal += size + iopsTotal += 1 + if self.Pattern and size > 0 and size < 1024 * 1024 * 100: + stat[task][self.patternIdx(size)] += 1 + if 'F' in iotype: + stat[task]["flushIO"] += 1 + + if self.iopsThresh or self.bwThresh: + if (self.bwThresh and bwTotal >= self.bwThresh) or \ + (self.iopsThresh and iopsTotal >= self.iopsThresh): + pass + else: + return + + if self.enableJsonShow() == False: + print(time.strftime('%Y/%m/%d %H:%M:%S', time.localtime())) + super(iostatClass, self).show() + + if self.miscStat is not None: + tmpStat = [] + for d,val in mStat.items(): + s = sorted(val.items(), + key=lambda e: (e[1]["bps_wr"]+e[1]["bps_rd"]), + reverse=True)[:self.top] + tmpStat.append((d, s)) + del self.miscStat[:] + self.miscStat.extend(tmpStat) + return + + stat = sorted(stat.items(), + key=lambda e: (e[1]["iops_rd"] + e[1]["iops_wr"]), + reverse=True)[:self.top] + + if self.enableJsonShow() == True: + self.showJson(stat) + return + + tPattern = '' + if self.Pattern: + WrIopsTotal = 0 + RdIopsTotal = 0 + WrBwTotal = 0 + RdBwTotal = 0 + tPattern = ('%-12s%-12s%-12s%-12s%-12s%-12s%-12s%-12s' % ( + "pat_W4K", "pat_W16K", "pat_W32K", "pat_W64K", "pat_W128K", + "pat_W256K", "pat_W512K", "pat_Wlarge" + )) + print('%-20s%-8s%-24s%-12s%-16s%-12s%-12s%-12s%s' % + ("comm", "pid", "cid", "iops_rd", "bps_rd", "iops_wr", "bps_wr", + "device", tPattern)) + stSecs = str(secs)+'s' if secs > 1 else 's' + for key, item in stat: + if (item["iops_rd"] + item["iops_wr"]) == 0: + continue + patPercent = '' + if self.Pattern: + WrIopsTotal += item["iops_wr"] + RdIopsTotal += item["iops_rd"] + WrBwTotal += item["bps_wr"] + RdBwTotal += item["bps_rd"] + patPercent = ('%-12s%-12s%-12s%-12s%-12s%-12s%-12s%-12s' % ( + self.patternPercent(item["pat_W4K"], item["iops_wr"]), + self.patternPercent(item["pat_W16K"], item["iops_wr"]), + self.patternPercent(item["pat_W32K"], item["iops_wr"]), + self.patternPercent(item["pat_W64K"], item["iops_wr"]), + self.patternPercent(item["pat_W128K"], item["iops_wr"]), + self.patternPercent(item["pat_W256K"], item["iops_wr"]), + self.patternPercent(item["pat_W512K"], item["iops_wr"]), + self.patternPercent(item["pat_Wlarge"], item["iops_wr"]) + )) + item["bps_rd"] = \ + humConvert(item["bps_rd"], True).replace('s', stSecs) if item["bps_rd"] else 0 + item["bps_wr"] = \ + humConvert(item["bps_wr"], True).replace('s', stSecs) if item["bps_wr"] else 0 + patPercent += item["cid"] + print('%-20s%-8s%-24s%-12s%-16s%-12s%-12s%-12s%s' % (item["comm"], + str(item["pid"]), item["cid"][0:20], str(item["iops_rd"]), + item["bps_rd"], str(item["iops_wr"]), item["bps_wr"], + item["device"], patPercent)) + if self.Pattern: + print('totalIops:%d(r:%d, w:%d), totalBw:%s(r:%s, w:%s)' % + (iopsTotal, RdIopsTotal, WrIopsTotal, + (humConvert(bwTotal, True).replace('s', stSecs) if bwTotal else 0), + (humConvert(RdBwTotal, True).replace('s', stSecs) if RdBwTotal else 0), + (humConvert(WrBwTotal, True).replace('s', stSecs) if WrBwTotal else 0))) + print("") + + def entry(self, interval): + self.start() + time.sleep(float(interval)) + self.stop() + self.show() diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/promiscClass.py b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/promiscClass.py new file mode 100755 index 0000000000000000000000000000000000000000..b3f90fc5ecaeb4861d5e77ee5c2092af254a5dda --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iofstool/promiscClass.py @@ -0,0 +1,215 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import time +import re +import json +from collections import OrderedDict +from common import humConvert +from iostatClass import iostatClass +from fsstatClass import fsstatClass + + +class promiscClass(): + def __init__( + self, devname, utilThresh, iopsThresh, bwThresh, top, json, + nodiskStat, Pattern): + self._iostat = [] + self._fsstat = [] + self.fs = fsstatClass(devname, None, utilThresh, bwThresh, + top, json, nodiskStat, self._fsstat, Pattern) + self.io = iostatClass(devname, None, utilThresh, iopsThresh, + bwThresh, top, json, nodiskStat, self._iostat, + Pattern) + + + def _selectKworker(self, iostat, fsItem, kworker): + select = None + largeFound = False + diff = sys.maxsize + for k in kworker: + fsRestBw = fsItem["bw_wr"] + if 'restBW' in fsItem.keys(): + fsRestBw = fsItem["restBW"] + if fsRestBw > (iostat[k]["bps_wr"] * 15) or \ + (fsRestBw * 50) < iostat[k]["bps_wr"]: + continue + d = abs(fsItem["bw_wr"] - iostat[k]["bps_wr"]) + diff = min(d, diff) + if iostat[k]["bps_wr"] > fsItem["bw_wr"]: + if not largeFound or diff == d: + select = k + largeFound = True + continue + if not largeFound and diff == d: + select = k + return select + + + def _addBioToKworker(self, iostat, kworker, fsItem): + repeated = False + k = self._selectKworker(iostat, fsItem, kworker) + if not k: + return False, 0 + if 'bufferio' not in iostat[k].keys(): + iostat[k].setdefault('bufferio', []) + task = fsItem["comm"]+':'+str(fsItem["tgid"])+':'+str(fsItem["pid"])+\ + ':'+fsItem["cid"][0:20] + bio = {'task': task, 'Wrbw': fsItem["bw_wr"], 'file': fsItem["file"], + 'device': fsItem["device"]} + for d in iostat[k]["bufferio"]: + if task == d['task'] and d['file'] == bio['file'] and \ + d['device'] == bio['device']: + d['Wrbw'] = max(d['Wrbw'], bio["Wrbw"]) + repeated = True + break + if not repeated: + iostat[k]["bufferio"].append(bio) + return True, iostat[k]["bps_wr"] + + + def _checkDeleteItem(self, addOK, costBW, item): + now = time.time() + # After 10 secs without adding to any kworker, we will delete the fsItem + agingTime = 10 + if 'restBW' not in item.keys(): + item.setdefault('restBW', item["bw_wr"]) + item.setdefault('startAging', now) + if addOK: + item["startAging"] = time.time() + item["restBW"] = item["restBW"] - costBW if addOK else item["restBW"] + if item["restBW"] <= 0 or (item["restBW"] < item["bw_wr"] and \ + (now - item["startAging"]) >= agingTime): + return True + return False + + + def _miscIostatFromFsstat(self): + fsstats = self._fsstat + iostats = dict(self._iostat) + for disk, fsItems in fsstats: + if disk not in iostats.keys(): + continue + rmList = [] + iostat = dict(iostats[disk]) + kworker = [key for key,val in iostat.items() if 'kworker' in val['comm']] + for key, item in fsItems: + taskI = item["pid"]+':'+disk + if taskI in iostat.keys(): + if 'file' not in iostat[taskI].keys(): + iostat[taskI].setdefault('file', []) + iostat[taskI]['cid'] = item['cid'] + iostat[taskI]["file"].append(item["file"]) + if item["bw_wr"] <= (iostat[taskI]["bps_wr"] * 15): + rmList.append((key, item)) + continue + if kworker: + if item["bw_wr"] < item["bw_rd"]: + rmList.append((key, item)) + continue + addOK,cost = self._addBioToKworker(iostat, kworker, item) + deleted = self._checkDeleteItem(addOK, cost, item) + if deleted: + rmList.append((key, item)) + for key, item in rmList: + fsItems.remove((key, item)) + iostats[disk] = iostat + return iostats + + + def _miscShowJson(self, iostats): + secs = self.io.cycle + statJsonStr = '{"time":"","mstats":[]}' + mstatDicts = json.loads(statJsonStr, object_pairs_hook=OrderedDict) + mstatDicts['time'] = time.strftime('%Y/%m/%d %H:%M:%S', time.localtime()) + stSecs = str(secs)+'s' if secs > 1 else 's' + + for key, item in iostats: + if (item["iops_rd"]+item["iops_wr"]) == 0 or (item["bps_rd"]+item["bps_wr"]) == 0: + continue + item["bps_rd"] = humConvert( + item["bps_rd"], True).replace('s', stSecs) if item["bps_rd"] else '0' + item["bps_wr"] = humConvert( + item["bps_wr"], True).replace('s', stSecs) if item["bps_wr"] else '0' + if 'file' not in item.keys(): + item.setdefault('file', '-') + if 'kworker' in item["comm"] and 'bufferio' in item.keys(): + for i in item["bufferio"]: + i["Wrbw"] = humConvert(i["Wrbw"], True).replace('s', stSecs) + mstatDicts["mstats"].append(item) + if len(mstatDicts["mstats"]) > 0: + self.io.writeDataToJson(json.dumps(mstatDicts)) + + + def miscShow(self): + secs = self.io.cycle + if not self._fsstat and not self._iostat: + return + + iostats = self._miscIostatFromFsstat() + if not iostats: + return + tmp = {} + for d in iostats.values(): + tmp.update(dict(d)) + iostats = sorted( + tmp.items(), + key=lambda e: (int(e[1]["bps_rd"])+int(e[1]["bps_wr"])), + reverse=True) + if self.io.enableJsonShow() == True: + self._miscShowJson(iostats) + return + + print('%-20s%-8s%-24s%-12s%-16s%-12s%-12s%-8s%s' % + ("comm", "pid", "cid", "iops_rd", "bps_rd", "iops_wr", "bps_wr", + "device", "file")) + stSecs = str(secs)+'s' if secs > 1 else 's' + for key, item in iostats: + if (item["iops_rd"]+item["iops_wr"]) == 0 or (item["bps_rd"]+item["bps_wr"]) == 0: + continue + item["bps_rd"] = humConvert( + item["bps_rd"], True).replace('s', stSecs) if item["bps_rd"] else '0' + item["bps_wr"] = humConvert( + item["bps_wr"], True).replace('s', stSecs) if item["bps_wr"] else '0' + file = str(item["file"]) if 'file' in item.keys() else '-' + print('%-20s%-8s%-24s%-12s%-16s%-12s%-12s%-8s%s' % + (item["comm"], str(item["pid"]), item["cid"][0:20], str(item["iops_rd"]), + item["bps_rd"], str(item["iops_wr"]), item["bps_wr"], item["device"], file)) + if 'kworker' in item["comm"] and 'bufferio' in item.keys(): + for i in item["bufferio"]: + i["Wrbw"] = humConvert(i["Wrbw"], True).replace('s', stSecs) + print(' |----%-32s WrBw:%-12s Device:%-8s File:%s' % + (i['task'], i["Wrbw"], i["device"], i["file"])) + print("") + + + def config(self): + self.fs.config() + self.io.config() + + def start(self): + self.clear() + self.fs.start() + self.io.start() + + def stop(self): + self.fs.stop() + self.io.stop() + + def clear(self): + del self._iostat[:] + + def show(self): + self.fs.show() + self.io.show() + self.miscShow() + + def entry(self, interval): + self.start() + time.sleep(float(interval)) + self.stop() + self.show() diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iowaitstat/__init__.py b/source/tools/monitor/ioMonitor/ioMon/tools/iowaitstat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb8e4b62acc8012add0dc0175a04b6b4f51940ec --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iowaitstat/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +if __name__ == "__main__": + pass diff --git a/source/tools/monitor/ioMonitor/ioMon/tools/iowaitstat/iowaitstat.py b/source/tools/monitor/ioMonitor/ioMon/tools/iowaitstat/iowaitstat.py new file mode 100755 index 0000000000000000000000000000000000000000..69f473c87947f220f2376a1dba130d05eb1ef316 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMon/tools/iowaitstat/iowaitstat.py @@ -0,0 +1,354 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import argparse +import time +import re +import json +import threading +from collections import OrderedDict +from subprocess import PIPE, Popen +import shlex + +global_iowaitstat_stop = False + + +def signal_exit_handler(signum, frame): + global global_iowaitstat_stop + global_iowaitstat_stop = True + +def exit_handler(): + global global_iowaitstat_stop + global_iowaitstat_stop = True + +def execCmd(cmd): + p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) + return p.stdout.read().decode('utf-8') + + +def getTgid(pid): + try: + with open("/proc/"+str(pid)+"/status") as f: + return ''.join(re.findall(r'Tgid:(.*)', f.read())).lstrip() + except IOError: + return '-' + return '-' + + +def fixComm(comm, pid): + try: + if ".." in comm: + with open("/proc/"+str(pid)+"/comm") as f: + return f.read().rstrip('\n') + except IOError: + return comm + return comm + + +def echoFile(filename, txt): + os.system("echo \""+txt+"\" > "+filename) + + +def echoFileAppend(filename, txt): + os.system("echo \""+txt+"\" >> "+filename) + + +def supportKprobe(name): + cmd = "cat /sys/kernel/debug/tracing/available_filter_functions | grep " + name + ss = execCmd(cmd).strip() + for res in ss.split('\n'): + if ':' in res: + res = res.split(":", 1)[1] + if ' [' in res: # for ko symbol + res = res.split(" [", 1)[0] + if res == name: + return True + return False + +class iowaitClass(): + def __init__(self, pid, cycle, top, json, iowait_thresh): + self.pid = pid + self.top = int(top) if top is not None else 99999999 + self.json = json + self.cycle = cycle + self.iowait_thresh = int(iowait_thresh) if iowait_thresh is not None else 0 + self.kprobeEvent = "/sys/kernel/debug/tracing/kprobe_events" + self.tracingDir = "/sys/kernel/debug/tracing/instances/iowait" + self.kprobeDir = self.tracingDir+"/events/kprobes" + self.expression = [] + self.kprobe = [] + self.cpuStatIowait = {'sum': 0, 'iowait': 0} + if json: + self.fJson = open(json, 'w+') + + for kprobe,retProbe in {'io_schedule_timeout':True, 'io_schedule':True}.items(): + if supportKprobe(kprobe) == False: + print("not available %s kprobe" % kprobe) + continue + self.expression.append('p:p_%s_0 %s' % (kprobe, kprobe)) + self.kprobe.append('p_%s_0' % kprobe) + if retProbe == True: + self.expression.append('r:r_%s_0 %s' % (kprobe, kprobe)) + self.kprobe.append('r_%s_0' % kprobe) + if len(self.kprobe) == 0: + print "not available kprobe" + sys.exit(0) + + def config(self): + if not os.path.exists(self.tracingDir): + os.mkdir(self.tracingDir) + for exp in self.expression: + probe = exp.split()[0].split(':')[1] + enableKprobe = self.kprobeDir+"/"+probe+"/enable" + if os.path.exists(enableKprobe): + echoFile(enableKprobe, "0") + echoFileAppend(self.kprobeEvent, '-:%s' % probe) + + echoFileAppend(self.kprobeEvent, exp) + echoFile(enableKprobe, "1") + + def start(self): + echoFile(self.tracingDir+"/trace", "") + echoFile(self.tracingDir+"/tracing_on", "1") + with open("/proc/stat") as fStat: + cpuStatList = map(long, fStat.readline().split()[1:]) + self.cpuStatIowait['sum'] = sum(cpuStatList) + self.cpuStatIowait['iowait'] = cpuStatList[4] + + def stop(self): + echoFile(self.tracingDir+"/tracing_on", "0") + + def clear(self): + for exp in self.expression: + probe = exp.split()[0].split(':')[1] + enableKprobe = self.kprobeDir+"/"+probe+"/enable" + if os.path.exists(enableKprobe): + echoFile(enableKprobe, "0") + echoFileAppend(self.kprobeEvent, '-:%s' % probe) + if self.json: + self.fJson.close() + + def writeDataToJson(self, data): + self.fJson.write(data+'\n') + + def showJson(self, stat, totalTimeout, gloabIowait): + top = 0 + statJsonStr = '{"time":"", "global iowait":0,"iowait":[]}' + iowaitStatDicts = json.loads(statJsonStr, object_pairs_hook=OrderedDict) + iowaitStatDicts['time'] = time.strftime('%Y/%m/%d %H:%M:%S', time.localtime()) + iowaitStatDicts['global iowait'] = gloabIowait + for pid, item in stat.items(): + if item["timeout"] == 0: + continue + if top >= self.top: + break + top += 1 + iowait = str(round(item["timeout"] / totalTimeout * gloabIowait, 2)) + item["timeout"] = str(round(item["timeout"]*1000, 3)) + reason = '' + maxCnt = 0 + for key, val in item['reason'].items(): + if 'balance_dirty' in key: + reason = 'Too many dirty pages' + break + elif 'blk_mq_get_tag' in key: + reason = 'Device queue full' + break + elif 'get_request' in key: + reason = 'Ioscheduler queue full' + break + else: + if val > maxCnt: + reason = 'Unkown[stacktrace:'+key.replace('<-', '->')+']' + maxCnt = val + iowaitStatJsonStr = '{"comm":"","pid":0,"tgid":0,"timeout":0,"iowait":0,"reason":0}' + iowaitStatDict = json.loads( + iowaitStatJsonStr, object_pairs_hook=OrderedDict) + iowaitStatDict["comm"] = item["comm"] + iowaitStatDict["pid"] = pid + iowaitStatDict["tgid"] = item["tgid"] + iowaitStatDict["timeout"] = item["timeout"] + iowaitStatDict["iowait"] = iowait + iowaitStatDict["reason"] = reason + iowaitStatDicts["iowait"].append(iowaitStatDict) + if len(iowaitStatDicts["iowait"]) > 0: + self.writeDataToJson(json.dumps(iowaitStatDicts)) + + def show(self): + top = 0 + totalTimeout = 0 + stat = {} + secs = self.cycle + traceText = [] + + with open("/proc/stat") as fStat: + statList = map(long, fStat.readline().split()[1:]) + gloabIowait = float(format( + (statList[4]-self.cpuStatIowait['iowait'])*100.0 / + (sum(statList)-self.cpuStatIowait['sum']), '.2f')) + if gloabIowait < self.iowait_thresh: + return + + with open(self.tracingDir+"/trace") as f: + traceLoglist = list(filter(lambda x: any(e in x for e in self.kprobe), f.readlines())) + traceText = traceLoglist + + # jbd2/vda2-8-605 [001] .... 38890020.539912: p_io_schedule_0: (io_schedule+0x0/0x40) + # jbd2/vda2-8-605 [002] d... 38890020.540633: r_io_schedule_0: (bit_wait_io+0xd/0x50 <- io_schedule) + # <...>-130620 [002] .... 38891029.116442: p_io_schedule_timeout_0: (io_schedule_timeout+0x0/0x40) + # <...>-130620 [002] d... 38891029.123657: r_io_schedule_timeout_0: (balance_dirty_pages+0x270/0xc60 <- io_schedule_timeout) + for entry in traceText: + matchObj = re.match(r'(.*) \[([^\[\]]*)\] (.*) (.*): (.*): (.*)\n', entry) + if matchObj is None: + continue + commInfo = matchObj.group(1).rsplit('-', 1) + pid = commInfo[1].strip() + if self.pid is not None and pid != self.pid: + continue + if bool(stat.has_key(pid)) != True: + comm = fixComm(commInfo[0].lstrip(), pid) + if '..' in comm: + continue + stat.setdefault(pid, + {"comm": comm, "tgid": getTgid(pid), + "timeout": 0, "reason": {}, "entry": []}) + stat[pid]["entry"].append({ + 'time':matchObj.group(4), + 'point':matchObj.group(5), + 'trace':matchObj.group(6)}) + + if stat: + for key,item in stat.items(): + item["entry"] = sorted(item["entry"], key=lambda e: float(e["time"]), reverse=False) + count = 0 + startT = 0 + for entry in item["entry"]: + count += 1 + if (count % 2 != 0 and 'p_' not in entry['point']) or \ + (count % 2 == 0 and 'r_' not in entry['point']): + count = 0 + startT = 0 + continue + + if count % 2 != 0: + startT = float(entry['time']) + continue + + if startT > 0 and float(entry['time']) > startT: + if re.split('[(,+]', entry['trace'])[1] in re.split('[-,)]', entry['trace'])[1]: + count = 0 + startT = 0 + continue + item['timeout'] += (float(entry['time']) - startT) + totalTimeout += (float(entry['time']) - startT) + startT = 0 + if entry['trace'] not in item['reason'].keys(): + item['reason'].setdefault(entry['trace'], 0) + item['reason'][entry['trace']] += 1 + + if stat: + stat = OrderedDict(sorted(stat.items(), key=lambda e: e[1]["timeout"], reverse=True)) + if self.json: + self.showJson(stat, totalTimeout, gloabIowait) + return + else: + head = str(time.strftime('%Y/%m/%d %H:%M:%S', time.localtime()))+' -> global iowait%: '+str(gloabIowait) + print head + + print("%-32s%-8s%-8s%-16s%-12s%s" % ("comm", "tgid", "pid", "waitio(ms)", "iowait(%)", "reasons")) + for pid, item in stat.items(): + if item["timeout"] == 0: + continue + if top >= self.top: + break + top += 1 + iowait = str(round(item["timeout"] / totalTimeout * gloabIowait, 2)) + item["timeout"] = str(round(item["timeout"]*1000, 3)) + reason = '' + maxCnt = 0 + for key, val in item['reason'].items(): + if 'balance_dirty' in key: + reason = 'Too many dirty pages' + break + elif 'blk_mq_get_tag' in key: + reason = 'Device queue full' + break + elif 'get_request' in key: + reason = 'Ioscheduler queue full' + break + else: + if val > maxCnt: + reason = 'Unkown[stacktrace:'+key.replace('<-', '->')+']' + maxCnt = val + print("%-32s%-8s%-8s%-16s%-12s%s" + % (item["comm"], item["tgid"], pid, item["timeout"], iowait, str(reason))) + print("") + + def entry(self, interval): + self.start() + time.sleep(float(interval)) + self.stop() + self.show() + +def iowaitstatStart(argv): + global global_iowaitstat_stop + global_iowaitstat_stop = False + if os.geteuid() != 0: + print("%s" % ("This program must be run as root. Aborting.")) + sys.exit(0) + examples = """e.g. + ./iowaitstat.py + Report iowait for tasks + ./iowaitstat.py -c 1 + Report iowait for tasks per secs + ./iowaitstat.py -p [PID] -c 1 + Report iowait for task with [PID] per 1secs + """ + parser = argparse.ArgumentParser( + description="Report iowait for tasks.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + parser.add_argument('-p', '--pid', help='Specify the process id.') + parser.add_argument('-T', '--Timeout', + help='Specify the timeout for program exit(secs).') + parser.add_argument( + '-t', '--top', help='Report the TopN with the largest iowait.') + parser.add_argument('-c', '--cycle', help='Specify refresh cycle(secs).') + parser.add_argument('-j', '--json', help='Specify the json-format output.') + parser.add_argument('-w','--iowait_thresh', help='Specify the iowait-thresh to report.') + args = parser.parse_args(argv) if argv else parser.parse_args() + + pid = int(args.pid) if args.pid else None + secs = float(args.cycle) if args.cycle is not None else 0 + if argv is None: + signal.signal(signal.SIGINT, signal_exit_handler) + signal.signal(signal.SIGHUP, signal_exit_handler) + signal.signal(signal.SIGTERM, signal_exit_handler) + if args.Timeout is not None: + timeoutSec = args.Timeout if args.Timeout > 0 else 10 + secs = secs if secs > 0 else 1 + if argv is None: + signal.signal(signal.SIGALRM, signal_exit_handler) + signal.alarm(int(timeoutSec)) + else: + timer = threading.Timer(int(timeoutSec), exit_handler) + timer.start() + loop = True if secs > 0 else False + c = iowaitClass(pid, secs, args.top, args.json, args.iowait_thresh) + c.config() + interval = secs if loop == True else 1 + while global_iowaitstat_stop != True: + c.entry(interval) + if loop == False: + break + c.clear() + +def main(): + iowaitstatStart(None) + +if __name__ == "__main__": + main() diff --git a/source/tools/monitor/ioMonitor/ioMonitor.sh b/source/tools/monitor/ioMonitor/ioMonitor.sh new file mode 100755 index 0000000000000000000000000000000000000000..dfd3d4689472b7715c3cbce077915fa054ee7f88 --- /dev/null +++ b/source/tools/monitor/ioMonitor/ioMonitor.sh @@ -0,0 +1,11 @@ +#!/bin/sh +#****************************************************************# +# ScriptName: ioMonitor.sh +# Author: $SHTERM_REAL_USER@alibaba-inc.com +# Create Date: 2021-06-06 16:53 +# Modify Author: $SHTERM_REAL_USER@alibaba-inc.com +# Modify Date: 2021-06-06 16:53 +# Function: +#***************************************************************# +TOOLS_ROOT="$SYSAK_WORK_PATH/tools" +python $TOOLS_ROOT/ioMon/ioMonitorMain.py $* diff --git a/source/tools/monitor/unity/Dockerfile b/source/tools/monitor/unity/Dockerfile index dbaf30641e3483f49d8f4606c25ef0940ec7af82..c60554cb71280dc462401e4c556b063a08a4f257 100644 --- a/source/tools/monitor/unity/Dockerfile +++ b/source/tools/monitor/unity/Dockerfile @@ -2,7 +2,7 @@ FROM registry.cn-hangzhou.aliyuncs.com/sysom/lcc MAINTAINER "liaozhaoyan " WORKDIR /root/ RUN source /opt/rh/devtoolset-9/enable && \ - yum install -y make wget lua-devel unzip git && \ + yum install -y make wget lua-devel unzip git numactl-devel m4 && \ mkdir /root/build && \ cd /root/build && \ git clone https://gitee.com/chuyansz/sysak.git && \ @@ -33,6 +33,7 @@ RUN source /opt/rh/devtoolset-9/enable && \ luarocks install sha1 && \ luarocks install md5 && \ luarocks install luaposix 35.1-1 && \ + luarocks install http && \ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/ && \ cd ../beeQ/ && \ make \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/beaver.c b/source/tools/monitor/unity/beaver/beaver.c index 293cbb4b36895da35d203cad7717dd5dc29709da..9885f0567b3893d087c3fe89c49caf169e20fa16 100644 --- a/source/tools/monitor/unity/beaver/beaver.c +++ b/source/tools/monitor/unity/beaver/beaver.c @@ -12,22 +12,19 @@ #include #include -LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, int level); +extern int lua_reg_errFunc(lua_State *L); +extern int lua_check_ret(int ret); +int lua_load_do_file(lua_State *L, const char* path); -static void report_lua_failed(lua_State *L) { - fprintf(stderr, "\nFATAL ERROR:%s\n\n", lua_tostring(L, -1)); -} - -static int call_init(lua_State *L, char *fYaml) { +static int call_init(lua_State *L, int err_func, char *fYaml) { int ret; lua_Number lret; lua_getglobal(L, "init"); lua_pushstring(L, fYaml); - ret = lua_pcall(L, 1, 1, 0); + ret = lua_pcall(L, 1, 1, err_func); if (ret) { - perror("luaL_call init func error"); - report_lua_failed(L); + lua_check_ret(ret); goto endCall; } @@ -67,6 +64,7 @@ void LuaAddPath(lua_State *L, char *name, char *value) { static lua_State * echos_init(char *fYaml) { int ret; + int err_func; /* create a state and load standard library. */ lua_State *L = luaL_newstate(); @@ -77,22 +75,15 @@ static lua_State * echos_init(char *fYaml) { /* opens all standard Lua libraries into the given state. */ luaL_openlibs(L); - LuaAddPath(L, "path", "../beaver/?.lua"); + err_func = lua_reg_errFunc(L); - ret = luaL_loadfile(L, "../beaver/beaver.lua"); - ret = lua_pcall(L, 0, LUA_MULTRET, 0); + ret = lua_load_do_file(L, "../beaver/beaver.lua"); if (ret) { - const char *msg = lua_tostring(L, -1); - perror("luaL_dofile error"); - if (msg) { - luaL_traceback(L, L, msg, 0); - fprintf(stderr, "FATAL ERROR:%s\n\n", msg); - } goto endLoad; } - ret = call_init(L, fYaml); + ret = call_init(L, err_func, fYaml); if (ret < 0) { goto endCall; } @@ -107,13 +98,14 @@ static lua_State * echos_init(char *fYaml) { static int echos(lua_State *L) { int ret; + int err_func; lua_Number lret; + err_func = lua_gettop(L); lua_getglobal(L, "echo"); - ret = lua_pcall(L, 0, 1, 0); + ret = lua_pcall(L, 0, 1, err_func); if (ret) { - perror("lua call error"); - report_lua_failed(L); + lua_check_ret(ret); goto endCall; } @@ -147,7 +139,6 @@ int beaver_init(char *fYaml) { } ret = echos(L); lua_close(L); - sleep(5); // to release port } exit(1); } diff --git a/source/tools/monitor/unity/beaver/beaver.lua b/source/tools/monitor/unity/beaver/beaver.lua index 5774a8d14683abd14cc4d903c6a8a0fc62797145..2431d99ce7bcc5069490f8d0d3510e55df6a4da3 100644 --- a/source/tools/monitor/unity/beaver/beaver.lua +++ b/source/tools/monitor/unity/beaver/beaver.lua @@ -15,6 +15,7 @@ local CurlGuide = require("beaver.url_guide") local CurlExportHtml = require("beaver.url_export_html") local CurlExportRaw = require("beaver.url_export_raw") local CLocalBeaver = require("beaver.localBeaver") +local CbaseQuery = require("beaver.query.baseQuery") local lb = nil @@ -24,9 +25,10 @@ function init(fYaml) local web = Cframe.new() CurlIndex.new(web) - CurlApi.new(web) + CurlApi.new(web, fYaml) CurlRpc.new(web) CurlGuide.new(web) + CbaseQuery.new(web, fYaml) local Cidentity = require("beaver.identity") local inst = Cidentity.new(fYaml) diff --git a/source/tools/monitor/unity/beaver/export.lua b/source/tools/monitor/unity/beaver/export.lua index e1436c60c8c6a2f51227d5cba9ed833fc26f434e..5a97c6f3f968bdc1964fa9ac42aaae39ad066cb5 100644 --- a/source/tools/monitor/unity/beaver/export.lua +++ b/source/tools/monitor/unity/beaver/export.lua @@ -11,24 +11,18 @@ require("common.class") local Cexport = class("Cexport") -function Cexport:_init_(instance, fYaml) - self._instance = instance - local ms = system:parseYaml(fYaml) - self._freq = ms.config.freq - self._tDescr = ms.metrics - self._fox = CfoxTSDB.new() - self._fox:_setupRead() -end local function qFormData(from, tData) local res = {} local len = #tData local last = 0 + local c = 0 for i = len, 1, -1 do local line = tData[i] if from == line.title then if last == 0 or last == line.time then - table.insert(res, line) + c = c + 1 + res[c] = line last = line.time else break @@ -38,7 +32,7 @@ local function qFormData(from, tData) return res end -local function packLine(title, ls, v) +local function packLine_us(title, ls, v, time) local tLs = {} for k, v in pairs(ls) do table.insert(tLs, string.format("%s=\"%s\"", k , v)) @@ -48,23 +42,56 @@ local function packLine(title, ls, v) label = pystring:join(",", tLs) label = "{" .. label .. "}" end + return string.format("%s%s %.1f %d", title, label, v, time/1000) +end + +local function packLine(title, ls, v) + local tLs = {} + local c = 0 + for k, v in pairs(ls) do + c = c + 1 + tLs[c] = string.format("%s=\"%s\"", k , v) + end + local label = "" + if #tLs then + label = pystring:join(",", tLs) + label = "{" .. label .. "}" + end return string.format("%s%s %.1f", title, label, v) end +function Cexport:_init_(instance, fYaml) + self._instance = instance + local ms = system:parseYaml(fYaml) + self._freq = ms.config.freq + self._timestamps = ms.config.real_timestamps + if self._timestamps == true then + self.pack_line = packLine_us + else + self.pack_line = packLine + end + self._tDescr = ms.metrics + self._fox = CfoxTSDB.new(fYaml) + self._fox:_setupRead() +end + function Cexport:export() local qs = {} self._fox:resize() self._fox:qlast(self._freq, qs) local res = {} + local c = 0 for _, line in ipairs(self._tDescr) do local from = line.from local tFroms = qFormData(from, qs) if #tFroms then local title = line.title local help = string.format("# HELP %s %s", title, line.help) - table.insert(res, help) + c = c + 1 + res[c] = help local sType = string.format("# TYPE %s %s", title, line.type) - table.insert(res, sType) + c = c + 1 + res[c] = sType for _, tFrom in ipairs(tFroms) do local labels = system:deepcopy(tFrom.labels) @@ -74,11 +101,14 @@ function Cexport:export() labels.instance = self._instance for k, v in pairs(tFrom.values) do labels[line.head] = k - table.insert(res, packLine(title, labels, v)) + c = c + 1 + res[c] = self.pack_line(title, labels, v, tFrom.time) end end end end + c = c + 1 + res[c] = "" local lines = pystring:join("\n", res) return lines end diff --git a/source/tools/monitor/unity/beaver/frame.lua b/source/tools/monitor/unity/beaver/frame.lua index 0c243edd09cf599b54b9ddd2ab20b4b79e022402..2f64132c91958a7e68f971deb97ae60f9ccec9fe 100644 --- a/source/tools/monitor/unity/beaver/frame.lua +++ b/source/tools/monitor/unity/beaver/frame.lua @@ -6,27 +6,28 @@ -- refer to https://blog.csdn.net/zx_emily/article/details/83024065 -local unistd = require("posix.unistd") -local poll = require("posix.poll") - require("common.class") local ChttpComm = require("httplib.httpComm") local pystring = require("common.pystring") +local system = require("common.system") local Cframe = class("frame", ChttpComm) function Cframe:_init_() ChttpComm._init_(self) self._objs = {} + self._obj_res = {} end local function waitDataRest(fread, rest, tReq) local len = 0 local tStream = {tReq.data} + local c = #tStream while len < rest do local s = fread() if s then len = len + #s - table.insert(tStream, s) + c = c + 1 + tStream[c] = s else return -1 end @@ -123,7 +124,15 @@ function Cframe:echo404() return pystring:join("\r\n", tHttp) end -function Cframe:proc(fread) +function Cframe:findObjRes(path) + for k, v in pairs(self._obj_res) do + if string.find(path, k) then + return v + end + end +end + +function Cframe:proc(fread, session) local stream = waitHttpHead(fread) if stream == nil then -- read return stream or error code or nil return nil @@ -131,17 +140,20 @@ function Cframe:proc(fread) local tReq = self:parse(fread, stream) if tReq then + tReq.session = session if self._objs[tReq.path] then local obj = self._objs[tReq.path] local res, keep = obj:call(tReq) - return res, keep - else - print("show all path.") - for k, _ in pairs(self._objs) do - print("path:", k) - end - return self:echo404(), false + return res, keep, tReq.session + end + + local obj = self:findObjRes(tReq.path) + if obj then + local res, keep = obj:calls(tReq) + return res, keep, tReq.session end + + return self:echo404(), false, {} end end @@ -150,4 +162,9 @@ function Cframe:register(path, obj) self._objs[path] = obj end +function Cframe:registerRe(path, obj) + assert(self._obj_res[path] == nil, "the " .. path .. " is already registered.") + self._obj_res[path] = obj +end + return Cframe diff --git a/source/tools/monitor/unity/beaver/guide/bpf.md b/source/tools/monitor/unity/beaver/guide/bpf.md index 81b67b622aa772e25ad6b4412c4d342eb67bfb97..9edada61b147fa8193c70ab36f45e65d4f5473d7 100644 --- a/source/tools/monitor/unity/beaver/guide/bpf.md +++ b/source/tools/monitor/unity/beaver/guide/bpf.md @@ -1,13 +1,13 @@ -## 基于 eBPF 的监控开发手册 +## 基于 eBPF 的周期性采样监控开发手册 -我们在 `source/tools/monitor/unity/collector/plugin/bpfsample2` 路径提供了一个基于 eBPF 的监控开发样例。其主要包含三个部分: +我们在 `source/tools/monitor/unity/collector/plugin/bpfsample` 路径提供了一个基于 eBPF 的周期性采样监控开发样例。其主要包含三个部分: 1. Makefile: 用于编译该工具; -2. bpfsample2.bpf.c: 此处编写 eBPF 程序 -3. bpfsmaple2.c: 此处编写用户态程序 +2. bpfsample.bpf.c: 此处编写 eBPF 程序 +3. bpfsmaple.c: 此处编写用户态程序 接下分别介绍这三个部分。 @@ -16,9 +16,9 @@ ```Makefile newdirs := $(shell find ./ -type d) -bpfsrcs := bpfsample2.bpf.c -csrcs := bpfsample2.c -so := libbpfsample2.so +bpfsrcs := bpfsample.bpf.c +csrcs := bpfsample.c +so := libbpfsample.so include ../bpfso.mk ``` @@ -30,36 +30,32 @@ include ../bpfso.mk 开发者只需要关注上述三个变量的修改即可。 -### bpfsample2.bpf.c: eBPF 程序的编写 +### bpfsample.bpf.c: eBPF 程序的编写 ```c #include #include -#include "bpfsample2.h" +#include "bpfsample.h" -BPF_PERF_OUTPUT(perf, 1024); +BPF_ARRAY(count, u64, 200); SEC("kprobe/netstat_seq_show") int BPF_KPROBE(netstat_seq_show, struct sock *sk, struct msghdr *msg, size_t size) { - struct event e = {}; - - e.ns = ns(); - e.cpu = cpu(); - e.pid = pid(); - comm(e.comm); - - bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &e, sizeof(struct event)); + int default_key = 0; + u64 *value = bpf_map_lookup_elem(&count, &default_key); + if (value) { + __sync_fetch_and_add(value, 1); + } return 0; } - ``` -1. `vmlinux.h` 和 `coolbpf.h` 是coolbpf框架提供的两个头文件,里面包含了类似 `BPF_PERF_OUTPUT` 的helper函数,以及内核结构体的定义 -2. `bpfsample2.h` 是开发者自定义的头文件 +1. `vmlinux.h` 和 `coolbpf.h` 是coolbpf框架提供的两个头文件,里面包含了类似 `BPF_ARRAY` 的helper函数,以及内核结构体的定义 +2. `bpfsample.h` 是开发者自定义的头文件 -### bpfsample2.c: 用户态程序的编写 +### bpfsample.c: 用户态程序的编写 unity 监控框架提供了三个函数,分别是: @@ -79,22 +75,45 @@ void deinit(void) } ``` -在 `init` 函数里,需要去 load, attach eBPF程序,如有需要可能还会创建用于接收perf事件的线程。为了开发方便,coolbpf提供了简单的宏定义去完成这一系列的操作,即 `LOAD_SKEL_OBJECT(skel_name, perf);` 。因此,一般 `init` 函数具体形式如下: +在 `init` 函数里,需要去 load, attach eBPF程序。为了开发方便,coolbpf提供了简单的宏定义去完成这一系列的操作,即 `LOAD_SKEL_OBJECT(skel_name);` 。因此,一般 `init` 函数具体形式如下: ```c int init(void *arg) { - return LOAD_SKEL_OBJECT(bpf_sample2, perf);; + return LOAD_SKEL_OBJECT(bpf_sample); +} +``` + +对于 `call` 函数,我们需要周期性去读取 `map` 数据。本样例,在 `call` 函数读取 `count` map里面的数据,去统计事件触发的频次。 + + +```c +int call(int t, struct unity_lines *lines) +{ + int countfd = bpf_map__fd(bpfsample->maps.count); + int default_key = 0; + uint64_t count = 0; + uint64_t default_count = 0; + struct unity_line* line; + + bpf_map_lookup_elem(countfd, &default_key, &count); + bpf_map_update_elem(countfd, &default_key, &default_count, BPF_ANY); + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "bpfsample"); + unity_set_value(line, 0, "value", count); + + return 0; } ``` -对于 `call` 函数,我们保持不变,即直接 `return 0`。 对于 `deinit` 函数,同 `init` 函数里提供的 `LOAD_SKEL_OBJECT` 宏定义一样,我们也提供了类似的销毁宏定义,即:`DESTORY_SKEL_BOJECT`。 因此,一般 `deinit` 函数具体形式如下: ```c int deinit(void *arg) { - return DESTORY_SKEL_BOJECT(bpf_sample2); + return DESTORY_SKEL_BOJECT(bpf_sample); } ``` \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/bpf_perf.md b/source/tools/monitor/unity/beaver/guide/bpf_perf.md new file mode 100644 index 0000000000000000000000000000000000000000..2614c60c4401f9498bbceda764cd8d0e0f0fd362 --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/bpf_perf.md @@ -0,0 +1,100 @@ + + +## 基于 eBPF 的事件监控开发手册 + + +我们在 `source/tools/monitor/unity/collector/plugin/bpfsample2` 路径提供了一个基于 eBPF 的监控开发样例。其主要包含三个部分: + +1. Makefile: 用于编译该工具; +2. bpfsample2.bpf.c: 此处编写 eBPF 程序 +3. bpfsmaple2.c: 此处编写用户态程序 + +接下分别介绍这三个部分。 + +### Makfile + +```Makefile +newdirs := $(shell find ./ -type d) + +bpfsrcs := bpfsample2.bpf.c +csrcs := bpfsample2.c +so := libbpfsample2.so + +include ../bpfso.mk +``` + +1. `bpfsrcs`: 用来指定需要编译的 eBPF 程序源文件 +2. `csrcs`: 用来指定需要编译的用户态程序源文件 +3. `so`: 用来指定生成目标动态库名称 + +开发者只需要关注上述三个变量的修改即可。 + + +### bpfsample2.bpf.c: eBPF 程序的编写 + +```c +#include +#include +#include "bpfsample2.h" + +BPF_PERF_OUTPUT(perf, 1024); + +SEC("kprobe/netstat_seq_show") +int BPF_KPROBE(netstat_seq_show, struct sock *sk, struct msghdr *msg, size_t size) +{ + struct event e = {}; + + e.ns = ns(); + e.cpu = cpu(); + e.pid = pid(); + comm(e.comm); + + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &e, sizeof(struct event)); + return 0; +} + +``` + +1. `vmlinux.h` 和 `coolbpf.h` 是coolbpf框架提供的两个头文件,里面包含了类似 `BPF_PERF_OUTPUT` 的helper函数,以及内核结构体的定义 +2. `bpfsample2.h` 是开发者自定义的头文件 + + +### bpfsample2.c: 用户态程序的编写 + +unity 监控框架提供了三个函数,分别是: + +```c +int init(void *arg) +{ + return 0; +} + +int call(int t, struct unity_lines *lines) +{ + return 0; +} + +void deinit(void) +{ +} +``` + +在 `init` 函数里,需要去 load, attach eBPF程序,如有需要可能还会创建用于接收perf事件的线程。为了开发方便,coolbpf提供了简单的宏定义去完成这一系列的操作,即 `LOAD_SKEL_OBJECT(skel_name, perf);` 。因此,一般 `init` 函数具体形式如下: + +```c +int init(void *arg) +{ + return LOAD_SKEL_OBJECT(bpf_sample2, perf);; +} +``` + +对于 `call` 函数,我们保持不变,即直接 `return 0`。 + +对于 `deinit` 函数,同 `init` 函数里提供的 `LOAD_SKEL_OBJECT` 宏定义一样,我们也提供了类似的销毁宏定义,即:`DESTORY_SKEL_BOJECT`。 因此,一般 `deinit` 函数具体形式如下: + +```c +int deinit(void *arg) +{ + return DESTORY_SKEL_BOJECT(bpf_sample2); +} +``` \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/dev.md b/source/tools/monitor/unity/beaver/guide/dev.md new file mode 100644 index 0000000000000000000000000000000000000000..5868f3a479e25b63ff871d960551d20cc0eb0ed7 --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/dev.md @@ -0,0 +1,9 @@ +245 上开发 + +``` +docker pull registry.cn-hangzhou.aliyuncs.com/sysom/unity:v1.1 +docker run --net=host --privileged=true -v /:/mnt/host:ro --name unity -it -d registry.cn-hangzhou.aliyuncs.com/sysom/unity:v1.1 /bin/bash +docker exec -it unity bash +cd build/sysak/source/tools/monitor/unity/test/bees/ +./run.sh +``` \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/dev_proc.md b/source/tools/monitor/unity/beaver/guide/dev_proc.md index 3c006d0dfb0e7bca223ec63064099788ffe118e1..b21b886396687cebc482298f38717f879a80677e 100644 --- a/source/tools/monitor/unity/beaver/guide/dev_proc.md +++ b/source/tools/monitor/unity/beaver/guide/dev_proc.md @@ -304,4 +304,4 @@ return CkvProc -[返回目录](/guide) \ No newline at end of file +[返回目录](/guide/guide.md) \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/develop.md b/source/tools/monitor/unity/beaver/guide/develop.md new file mode 100644 index 0000000000000000000000000000000000000000..2a2feff70097a6fefd319e8e07390cf958be3657 --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/develop.md @@ -0,0 +1,680 @@ +# 1、unity 监控框架概述 + +unity 监控框架以插件化开发为主,支持coolbpf 应用,以及多种数据发布方式。具有配置灵活,资源占用率低等优点,适合在服务器监控等领域部署。 + +![frame](image/frame.png) + +# 2、开发构建流程 + +## 2.1 clone 代码 + +开发机可以访问gitee和registry.cn-hangzhou.aliyuncs.com,并且已经安装了docker 和 git。 + +``` +git clone -b unity https://gitee.com/anolis/sysak.git +``` + +## 2.2 拉起容器 + +``` +docker run -v /root/1ext/code/:/root/code -v /:/mnt/host:ro --net=host --pid=host --name unity --privileged -itd registry.cn-hangzhou.aliyuncs.com/sysom/sysom:v1.0 /bin/sh +``` + +docker 参数说明: + +* /root/1ext/code/:/root/code -> 将代码目录挂载到容器目录下,方便代码同步 +* /:/mnt/host:ro/:/mnt/host:ro ->将host根目录的以只读方式挂载进来 +* /var/run/docker.sock:/var/run/docker.sock -> 挂载host 侧的docker 接口,可以根据自己开发机的实际情况进行选择 +* --name unity docker 名,可以自定义命名 +* --privileged 特权容器模式,如果要在容器里面进行调试,该选项不能省略 + +启动编译 + +``` +./configure --enable-libbpf --enable-target-unity + make +``` + +编译后在 sysak/out/.sysak_components/tools/dist 目录下会生成目标包文件。 + +## 2.3 准备 plugin.yaml 配置文件 + +unity 监控启动脚本默认会从 /etc/sysak/plugin.yaml 读取配置。典型的配置表说明: + +``` +config: + freq: 20 # 采集间隔 + port: 8400 # 监听端口 + bind_addr: 0.0.0.0 # 监听ip + backlog: 32 # 服务监听对队列长度, + identity: # 实例id配置模式,当前支持以下五种模式 + # hostip: 获取主机IP + # curl: 通过网络请求获取,需要指定url 参数,适合ECS场景 + # file: 从文件读取,需要指定path 参数 + # specify: 指定id,需要指定name参数 + mode: specify + name: test_specify + real_timestamps: true #上报监测数据的真实时间,默认关闭 + unix_socket: "/tmp/sysom_unity.sock" #通过unix_socket方式进行数据传输,默认关闭 + proc_path: /mnt/host/ # proc 文件路径,在host侧,为 / 在容器侧,如配置 -v /:/mnt/host 则配置为 /mnt/host + +outline: # 外部数据入口,适合接入外部数据场景 + - /tmp/sysom # 外部unix socket 路径,可以指定多个 + +luaPlugins: ["proc_buddyinfo", "proc_diskstats", "proc_meminfo", "proc_mounts", "proc_netdev", + "proc_snmp_stat", "proc_sockstat", "proc_stat", "proc_statm", "proc_vmstat"] # 控制lua 插件加载 + + +plugins: # 插件列表 对应 /collector/plugin 路径下编译出来的c库文件。 + - so: kmsg # 库名 + description: "collect dmesg info." # 描述符 + …… + +metrics: # export 导出的 metrics 列表 + - + title: sysak_proc_cpu_total # 显示的表名 + from: cpu_total # 数据源头,对应collector生成的数据表 + head: mode # 字段名,在prometheus 中以label 方式呈现 + help: "cpu usage info for total." # help 说明 + type: "gauge" # 数据类型 + …… +``` + +## 2.4 启动监控 + +进入 sysak/out/.sysak_components/tools/dist/app/beeQ 目录下, 执行run.sh 脚本,启动监控 +执行 curl 即可以查询到实时数据 + +``` +curl 127.0.0.1:8400/metrics +``` + +# 3、监控开发 + +## 3.1、监控指标采集 by lua + +本节将描述讲解如何基于lua 开发proc 数据采集。 + +### 3.1.1、纯pystring 处理方法 + +预备知识,lua + +* [pystring](https://gitee.com/chuyansz/sysak/blob/opensource_branch/source/tools/monitor/unity/beaver/guide/pystring.md) 库,处理字符串 +* [面向对象设计](https://gitee.com/chuyansz/sysak/blob/opensource_branch/source/tools/monitor/unity/beaver/guide/oop.md) + +以提取 /proc/net/sockstat 数据为例,原始的信息如下: + +``` +#cat /proc/net/sockstat +sockets: used 83 +TCP: inuse 6 orphan 0 tw 0 alloc 33 mem 2 +UDP: inuse 6 mem 12 +UDPLITE: inuse 0 +RAW: inuse 0 +FRAG: inuse 0 memory 0 +``` + +#### 3.1.1.1、数据处理策略 +sockstat 接口导出的数据非常有规律,基本上是 + +``` +[大标题]: [小标题] [值] …… +[大标题]: [小标题] [值] …… +``` + +这种方法进行组合,可以针对以上方式进行处理。 + +#### 3.1.1.2、数据格式 + +监控使用 [protobuf](https://www.jianshu.com/p/a24c88c0526a) 来序列化和存取数据,标准数据.proto 文件描述如下: + +``` + message labels { + required string name = 1; + required string index = 2; + } + message values { + required string name = 1; + required double value = 2; + } + message logs { + required string name = 1; + required string log = 2; + } + message dataLine{ + required string line = 1; + repeated labels ls = 2; + repeated values vs = 3; + repeated logs log = 4; + } + message dataLines{ + repeated dataLine lines = 1; + } + } +``` + +想了解监控 对 protobuf的处理,可以参考 [这个通用库](https://gitee.com/chuyansz/sysak/blob/opensource_branch/source/tools/monitor/unity/common/protoData.lua) + +#### 3.1.1.3、 vproc 虚基础类 +vproc 是所有 proc 接口数据采集的基础类,提供了通用的数据封装函数。根据前面的proto 文件描述,存储数据实质就是一堆数据表行组成的,在[vproc](https://gitee.com/chuyansz/sysak/blob/opensource_branch/source/tools/monitor/unity/collector/vproc.lua) 声明如下: + +``` +function CvProc:_packProto(head, labels, vs, log) + return {line = head, ls = labels, vs = vs, log = log} +end +``` + +添加数据行: + +``` +function CvProc:appendLine(line) + table.insert(self._lines["lines"], line) +end +``` + +将生成好的数据往外部table 中推送并清空本地数据: + +``` +function CvProc:push(lines) + for _, v in ipairs(self._lines["lines"]) do + table.insert(lines["lines"], v) + end + self._lines = nil + return lines +end +``` + +#### 3.1.1.4、整体代码实现 +了解了vproc 类后,就可以从vproc 实现一个 /proc/net/sockstat 数据采集接口。代码 实现和注释如下: + +``` +require("class") -- 面向对象 class 声明 +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") + +local CprocSockStat = class("procsockstat", CvProc) -- 从vproc 继承 + +function CprocSockStat:_init_(proto, pffi, pFile) -- 调用构造函数 + CvProc._init_(self, proto, pffi, pFile or "/proc/net/sockstat") +end + +function CprocSockStat:proc(elapsed, lines) -- 在主循环中会周期性调用proc 函数进行收集数据 + CvProc.proc(self) -- 新建本地表 + local vs = {} -- 用于暂存有效数据 + for line in io.lines(self.pFile) do -- 读取文件内容 + local cells = pystring:split(line, ":", 1) -- 按: 分割标题和内容 + if #cells > 1 then -- 防止 空行产生无效数据 + local head, body = cells[1], cells[2] + head = string.lower(head) -- 标题统一小写 + body = pystring:lstrip(body, " ") -- 去除开头的空格 + local bodies = pystring:split(body, " ") -- 按空格分割内容 + local len = #bodies / 2 + for i = 1, len do + local title = string.format("%s_%s", head, bodies[2 * i - 1]) -- 组合数值标题 + local v = { + name=title, + value=tonumber(bodies[2 * i]) + } + table.insert(vs, v) -- 添加到暂存表中 + end + end + end + self:appendLine(self:_packProto("sock_stat", nil, vs)) -- 保存到本地表中 + return self:push(lines) --推送到全局表,并发送出去 +end + +return CprocSockStat -- 这一行不能少 +``` + +#### 3.1.1.5、注册到主循环中 + +[loop.lua](https://gitee.com/chuyansz/sysak/blob/opensource_branch/source/tools/monitor/unity/collector/loop.lua) 是周期性采样所有数据的循环实现。首先将文件引入: + +``` +local CprocSockStat = require("collector.proc_sockstat") +``` + +然后添加到collector 表中 + +``` +CprocSockStat.new(self._proto, procffi), +``` + +此时数据已经保存在本地 + +#### 3.1.1.6、导出到export + +要将采集到的指标采集到export,只需要在 [plugin.yaml](https://gitee.com/chuyansz/sysak/blob/opensource_branch/source/tools/monitor/unity/collector/plugin.yaml) 中添加以下行做配置即可: + +``` + - title: sysak_sock_stat + from: sock_stat # 代码中声明的表行 + head: value + help: "sock stat counters from /proc/net/sockstat" + type: "gauge" +``` + +#### 3.1.1.7、 数据呈现 +用浏览器打开本地8400端口,到指标链接中,就可以提取到以下新增数据 + +``` +# HELP sysak_sock_stat sock stat counters. +# TYPE sysak_sock_stat gauge +sysak_sock_stat{value="frag_inuse",instance="12345abdc"} 0.0 +sysak_sock_stat{value="udplite_inuse",instance="12345abdc"} 0.0 +sysak_sock_stat{value="udp_mem",instance="12345abdc"} 8.0 +sysak_sock_stat{value="tcp_mem",instance="12345abdc"} 1.0 +sysak_sock_stat{value="tcp_alloc",instance="12345abdc"} 32.0 +sysak_sock_stat{value="frag_memory",instance="12345abdc"} 0.0 +sysak_sock_stat{value="sockets_used",instance="12345abdc"} 80.0 +sysak_sock_stat{value="raw_inuse",instance="12345abdc"} 0.0 +sysak_sock_stat{value="tcp_tw",instance="12345abdc"} 0.0 +sysak_sock_stat{value="tcp_orphan",instance="12345abdc"} 0.0 +sysak_sock_stat{value="tcp_inuse",instance="12345abdc"} 5.0 +``` + +### 3.1.2、FFI 处理方式 +关于lua ffi 说明,可以先参考[lua扩展ffi](https://luajit.org/ext_ffi.html),本质是lua 可以通过ffi 接口直接调用C库参数,无需经过中间栈上传参等操作。 + +ffi的注意点: + +* ffi 数组下标是从0开始,和lua下标从1开始不一样; +* 可以直接引用ffi 中的数据结构,效率要比原生lua 高很多; +* ffi 是luajit 的功能,原生lua 并不支持; + +#### 3.1.2.1、 为什么要使用ffi? +pystring 虽然可以高效处理字符串数据,但是相比c语言中的scanf 接口来说效率还是要低很多。因此按行读取proc 数据,可以采用 ffi 接口来显著提升数据处理效率 + +#### 3.1.2.2、 ffi 数据结构和api 说明 + +proc 数据以变参为主,下面的结构体主要用于scanf 获取变参, 用于上层数据处理 + +``` +#define VAR_INDEX_MAX 64 + +// 变参整数类型,用于收集纯整数类型的数据 +typedef struct var_long { + int no; // 收集到参数数量 + long long value[VAR_INDEX_MAX]; //参数列表 +}var_long_t; + +// 变参字符串类型 +typedef struct var_string { + int no; // 收集到参数数量 + char s[VAR_INDEX_MAX][32]; //参数列表 +}var_string_t; + +// 变参 k vs 类型 +typedef struct var_kvs { + int no; // 收集到参数数量 + char s[32]; // 标题 + long long value[VAR_INDEX_MAX]; // 参数列表 +}var_kvs_t; +``` + +导出的c api + +``` +int var_input_long(const char * line, struct var_long *p); +int var_input_string(const char * line, struct var_string *p); +int var_input_kvs(const char * line, struct var_kvs *p); +``` + +综合来说: + +* var\_long\_t 适合纯整数数字输出的场景 +* var\_string\_t 适合纯字符串输出的场景 +* var\_kvs\_t 适合单字符串 + 多整形数字 组合的场景,如 /proc/stat的内容输出 + +其它重复组合场景可以先按照 var\_string\_t 来收集,然后对指定位置的数字字符串通过tonumber 进行转换。 + +#### 3.1.2.3 实际应用例子 +以[kvProc.lua](https://gitee.com/chuyansz/sysak/blob/opensource_branch/source/tools/monitor/unity/collector/kvProc.lua) 为例,它实现了一个通用kv组合的proc接口数据的数据高效的处理方法。如经常使用到的 /proc/meminfo ,是典型的kv值例子 + +``` +#cat /proc/meminfo +MemTotal: 2008012 kB +MemFree: 104004 kB +MemAvailable: 1060412 kB +Buffers: 167316 kB +Cached: 877672 kB +SwapCached: 0 kB +Active: 1217032 kB +Inactive: 522236 kB +Active(anon): 694948 kB +Inactive(anon): 236 kB +Active(file): 522084 kB +Inactive(file): 522000 kB +…… +``` +对应处理代码说明,重点需要关注**readKV**函数实现。 + +``` +local system = require("common.system") +require("common.class") +local CvProc = require("collecotor.vproc") + +local CkvProc = class("kvProc", CvProc) + +function CkvProc:_init_(proto, pffi, mnt, pFile, tName) + CvProc._init_(self, proto, pffi, pFile) -- 从基础类继承 + self._protoTable = { + line = tName, -- 表名 如/proc/meminfo 可以取 meminfo 为表名 + ls = nil, + vs = {} + } +end + +function CkvProc:checkTitle(title) -- 去除label中的保留字符,防止数据保存失败 + local res = string.gsub(title, ":", "") --去除 :和) + res = string.gsub(res, "%)", "") + res = string.gsub(res, "%(", "_") --(替换为_ + return res +end + +function CkvProc:readKV(line) -- 处理单行数据 + local data = self._ffi.new("var_kvs_t") -- 新增一个 var_kvs_t 结构体 + assert(self._cffi.var_input_kvs(self._ffi.string(line), data) == 0) --调用c api 进行读取 + assert(data.no >= 1) --确保访问成功 + + local name = self._ffi.string(data.s) -- 标题处理 + name = self:checkTitle(name) + local value = tonumber(data.value[0]) + + local cell = {name=name, value=value} -- 生存一段数据 + table.insert(self._protoTable["vs"], cell) -- 将数据存入表中 +end + +function CkvProc:proc(elapsed, lines) --处理数据 + self._protoTable.vs = {} + CvProc.proc(self) + for line in io.lines(self.pFile) do --遍历行 + self:readKV(line) -- 处理数据 + end + self:appendLine(self._protoTable) -- 添加到大表中 + return self:push(lines) --往外推送 +end + +return CkvProc +``` + +## 3.2、C 插件开发 + +在collector/plugin/sample 目录下有一个示例工程,它的本质其实就是一个so文件的编译项目。首先要看下sample 同级目录下的公共头文件 plugin_head.h,该头文件提供了数据生成的API,降低开发者实现难度。 + +``` +/// \brief 申请数据行数量,在填入数据前统一申请,根据实际情况填入 + /// \param lines 数据结构体 + /// \param num 申请行号数量 + /// \return 成功返回 0 + inline int unity_alloc_lines(struct unity_lines * lines, unsigned int num) __attribute__((always_inline)); + /// \brief 获取对应行数据,用于填入数据 + /// \param lines 数据结构体 + /// \param i 对应行下标 + /// \return 返回对应的数据行 + inline struct unity_line * unity_get_line(struct unity_lines * lines, unsigned int i) __attribute__((always_inline)); + /// \brief 设置数据行 表名 + /// \param line 行指针 + /// \param table 表名 + /// \return 成功返回 0 + inline int unity_set_table(struct unity_line * line, const char * table) __attribute__((always_inline)); + /// \brief 设置数据行 索引信息 + /// \param line 行指针 + /// \param i 索引下标 + /// \param name 索引名 + /// \param index 索引内容 + /// \return 成功返回 0 + inline int unity_set_index(struct unity_line * line, unsigned int i, const char * name, const char * index) __attribute__((always_inline)); + /// \brief 设置数据行 指标信息 + /// \param line 行指针 + /// \param i 指标下标 + /// \param name 指标名 + /// \param value 指标内容 + /// \return 成功返回 0 + inline int unity_set_value(struct unity_line * line, unsigned int i, const char * name, double value) __attribute__((always_inline)); + /// \brief 设置数据行 日志信息 + /// \param line 行指针 + /// \param name 日志名 + /// \param value 日志内容 + /// \return 成功返回 0 + inline int unity_set_log(struct unity_line * line, const char * name, const char * log) __attribute__((always_inline)); + /// \brief 设置数据行 日志信息 + /// \return 返回mount 目录 + char* get_unity_proc(void); +``` + +**数据规格限制** + +1. unity\_set\_table 中 table 参数长度应该小于32(不含) +2. unity\_set\_index 中 name、index和unity\_set\_value 中 name 参数长度应该要小于16(不含) +3. unity\_set\_index 下标从0开始,并小于 4,即最多4个索引。而且下标数值应该连续,否则数据会从留白处截断 +4. unity\_set\_index 下标从0开始,并小于 32,即最多32个数值。而且下标数值应该连续,否则数据会从留白处截断; +5. unity\_set\_log 中的log 指针需要开发者进行释放; +6. get\_unity\_proc参考2.3节中 proc_path 中的内容; + +### 3.2.1、sample 用例代码 + +适合周期性数据采集的场景,通过周期性调用call 函数来收集数据 + +参考 sample.c + +``` + + /// \brief 插件构造函数,在加载so的时候,会调用一次init + /// \param arg 当前未使用,为NULL + /// \return 成功返回 0 + int init(void * arg) { + printf("sample plugin install.\n"); + return 0; + } + + /// \brief 插件调用函数,通过调用在函数来收集要采集的指标 + /// \param t,间隔周期,如15s的采样周期,则该值为15 + /// \param lines 数值指针,用于填充采集到的数据。 + /// \return 成功返回 0 + int call(int t, struct unity_lines* lines) { + static double value = 0.0; + struct unity_line* line; + + unity_alloc_lines(lines, 2); + line = unity_get_line(lines, 0); + unity_set_table(line, "sample_tbl1"); + unity_set_index(line, 0, "mode", "sample1"); + unity_set_value(line, 0, "value1", 1.0 + value); + unity_set_value(line, 1, "value2", 2.0 + value); + + line = unity_get_line(lines, 1); + unity_set_table(line, "sample_tbl2"); + unity_set_value(line, 0, "value1", 3.0 + value); + unity_set_value(line, 1, "value2", 4.0 + value); + unity_set_value(line, 2, "value3", 3.1 + value); + unity_set_value(line, 3, "value4", 4.1 + value); + + value += 0.1; + return 0; + } + + /// \brief 插件析构函数,调用完该函数时,必须要确保该插件已申请的资源已经全部释放完毕。 + /// \return 成功返回 0 + void deinit(void) { + printf("sample plugin uninstall\n"); + } +``` + +### 3.2.3、threads 代码 + +sample 适合常规数据采集,周期性遍历插件拉取指标的场景。但在实际实践中,还存在数据主动推送的场景。如下图紫线路径所示: + +![dataflow](image/queue.png) + +这种场景下,可以通过创建thread 方式进行进行数据推送,相关参考代码在 collector/plugin/thread 目录 + +``` +#include "sample_threads.h" +#include +#include + +static volatile pthread_t sample_thread_id = 0; //进程id,停止的时候使用 + +static int sample_thread_func(struct beeQ* q, void * arg); //线程回调函数声明,可以通过arg 向 线程回调函数传参 +int init(void * arg) { + struct beeQ* q = (struct beeQ *)arg; + sample_thread_id = beeQ_send_thread(q, NULL, sample_thread_func); // 创建线程 + printf("start sample_thread_id: %lu\n", sample_thread_id); + return 0; +} + +static int sample_thread_func(struct beeQ* q, void * arg) { + unsigned int ret; + while (plugin_is_working()) { + static double value = 1.0; + struct unity_line* line; + struct unity_lines * lines = unity_new_lines(); + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "sample_tbl3"); + unity_set_value(line, 0, "value1", 1.0 + value); + unity_set_value(line, 1, "value2", 2.0 + value); + unity_set_log(line, "log", "hello world."); + beeQ_send(q, lines); // 往队列里面推送数据 + ret = sleep(5); + if (ret > 0) { // interrupt by signal + break; + } + } + return 0; +} + +int call(int t, struct unity_lines* lines) { + static double value = 0.0; + struct unity_line* line; + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "sample_tbl1"); + unity_set_index(line, 0, "mode", "threads"); + unity_set_value(line, 0, "value1", 1.0 + value); + unity_set_value(line, 1, "value2", 2.0 + value); + + value += 0.1; + return 0; +} + +void deinit(void) { + plugin_thread_stop(sample_thread_id); + printf("thread plugin uninstall\n"); +} + +``` + +**在线程回调函数中,必须要判断所有调用到的函数是否被信号打断,用于决定是否需要退出并释放相应资源。** + +如实例代码中需要获取sleep 函数的返回值,根据[sleep函数](https://man7.org/linux/man-pages/man3/sleep.3.html)的返回值说明: + +``` +Zero if the requested time has elapsed, or the number of seconds + left to sleep, if the call was interrupted by a signal handler. +``` + +需要判断是否存在sleep 函数被打断的场景。 + +## 3.3、coolbpf 插件开发 + +关于coolbpf,可以参考[这里](https://gitee.com/anolis/coolbpf) + +`/collector/plugin/bpfsample2` 路径提供了一个基于 eBPF 的监控开发样例。其主要包含三个部分: + +1. Makefile: 用于编译该工具; +2. bpfsample2.bpf.c: 此处编写 eBPF 程序 +3. bpfsmaple2.c: 此处编写用户态程序 + +接下分别介绍这三个部分。 + +### 3.3.1、Makfile + +```Makefile +newdirs := $(shell find ./ -type d) + +bpfsrcs := bpfsample2.bpf.c +csrcs := bpfsample2.c +so := libbpfsample2.so + +include ../bpfso.mk +``` + +1. `bpfsrcs`: 用来指定需要编译的 eBPF 程序源文件 +2. `csrcs`: 用来指定需要编译的用户态程序源文件 +3. `so`: 用来指定生成目标动态库名称 + +开发者只需要关注上述三个变量的修改即可。 + + +### 3.3.2、bpfsample2.bpf.c: eBPF 程序的编写 + +```c +#include +#include +#include "bpfsample2.h" + +BPF_PERF_OUTPUT(perf, 1024); + +SEC("kprobe/netstat_seq_show") +int BPF_KPROBE(netstat_seq_show, struct sock *sk, struct msghdr *msg, size_t size) +{ + struct event e = {}; + + e.ns = ns(); + e.cpu = cpu(); + e.pid = pid(); + comm(e.comm); + + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &e, sizeof(struct event)); + return 0; +} + +``` + +1. `vmlinux.h` 和 `coolbpf.h` 是coolbpf框架提供的两个头文件,里面包含了类似 `BPF_PERF_OUTPUT` 的helper函数,以及内核结构体的定义 +2. `bpfsample2.h` 是开发者自定义的头文件 + + +### 3.3.3、bpfsample2.c: 用户态程序的编写 + +unity 监控框架提供了三个函数,分别是: + +```c +int init(void *arg) +{ + return 0; +} + +int call(int t, struct unity_lines *lines) +{ + return 0; +} + +void deinit(void) +{ +} +``` + +在 `init` 函数里,需要去 load, attach eBPF程序,如有需要可能还会创建用于接收perf事件的线程。为了开发方便,coolbpf提供了简单的宏定义去完成这一系列的操作,即 `LOAD_SKEL_OBJECT(skel_name, perf);` 。因此,一般 `init` 函数具体形式如下: + +```c +int init(void *arg) +{ + return LOAD_SKEL_OBJECT(bpf_sample2, perf);; +} +``` + +对于 `call` 函数,我们保持不变,即直接 `return 0`。 + +对于 `deinit` 函数,同 `init` 函数里提供的 `LOAD_SKEL_OBJECT` 宏定义一样,我们也提供了类似的销毁宏定义,即:`DESTORY_SKEL_BOJECT`。 因此,一般 `deinit` 函数具体形式如下: + +```c +int deinit(void *arg) +{ + return DESTORY_SKEL_BOJECT(bpf_sample2); +} +``` diff --git a/source/tools/monitor/unity/beaver/guide/guide.md b/source/tools/monitor/unity/beaver/guide/guide.md index 0ac809e7a0ffbd462335bac5b3112c8bed718682..f2d83799bae2068e9409cb238d571e39b0fae134 100644 --- a/source/tools/monitor/unity/beaver/guide/guide.md +++ b/source/tools/monitor/unity/beaver/guide/guide.md @@ -1,8 +1,7 @@ # 目录 - -1. [插件化与热更新](/guide/hotplugin) -2. [面向对象设计](/guide/oop) -3. [字符串处理](/guide/pystring) -4. [页面开发](/guide/webdevel) -5. [proc和probe记录表](/guide/proc_probe) -6. [采集proc 接口指标](/guide/dev_proc) \ No newline at end of file +1. [开发手册](/guide/develop.md) +2. [proc和probe记录表](/guide/proc_probe.md) +3. [metric 指标数据说明](/guide/metrics.md) +4. [在lua 中使用pystring](/guide/pystring.md) +5. [bpf\ map 开发](/guide/bpf.md) +6. [bpf perf 开发](/guide/bpf_perf.md) \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/hotplugin.md b/source/tools/monitor/unity/beaver/guide/hotplugin.md index 5eb67541b94d1cc9e7bed100142e18127f55adb6..5754909c921f73ce0f972a97a86d3a3c35ed42e7 100644 --- a/source/tools/monitor/unity/beaver/guide/hotplugin.md +++ b/source/tools/monitor/unity/beaver/guide/hotplugin.md @@ -111,4 +111,4 @@ unity监控采用[yaml](http://yaml.org/)对插件进行管理,当前插件分 此时数据只是已经更新入库了,但是要在nodexport上面显示,需要配置beaver/export.yaml 文件,才能将查询从数据表中更新。 -[返回目录](/guide) \ No newline at end of file +[返回目录](/guide/guide.md) \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/image/frame.png b/source/tools/monitor/unity/beaver/guide/image/frame.png new file mode 100644 index 0000000000000000000000000000000000000000..3892a4cb3f167ffb3c1a32be0ad7e1003e66f262 Binary files /dev/null and b/source/tools/monitor/unity/beaver/guide/image/frame.png differ diff --git a/source/tools/monitor/unity/beaver/guide/image/python.png b/source/tools/monitor/unity/beaver/guide/image/python.png new file mode 100644 index 0000000000000000000000000000000000000000..fb588d4672af570eef744f1e4d39ea3f866e1a4b Binary files /dev/null and b/source/tools/monitor/unity/beaver/guide/image/python.png differ diff --git a/source/tools/monitor/unity/beaver/guide/image/queue.png b/source/tools/monitor/unity/beaver/guide/image/queue.png new file mode 100644 index 0000000000000000000000000000000000000000..01fc076bf35716308fccc70cc0c37e9718810a9c Binary files /dev/null and b/source/tools/monitor/unity/beaver/guide/image/queue.png differ diff --git a/source/tools/monitor/unity/beaver/guide/metrics.md b/source/tools/monitor/unity/beaver/guide/metrics.md new file mode 100644 index 0000000000000000000000000000000000000000..5b276235baeefa31d16a8124b79de0c19b292e6f --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/metrics.md @@ -0,0 +1,175 @@ +# 指标说明 + +这里记录所有采集到的监控指标说明和来源,方便监控系统集成。 + +## 通用指标 + +------------- + +### uptime 表 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | ---: | :---- | :---- | :--- | +| uptime | 秒 | 从系统启动到现在的时间 | | collector/proc\_uptime.lua | +| idletime | 秒 | 系统总空闲的时间 | | collector/proc\_uptime.lua | +| stamp | 秒 | 系统时间戳 | unix 时间 | collector/proc\_uptime.lua | + +### uname 表 + +每小时获取一次 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +|:---------| ---: | :---- | :---- | :--- | +| nodename | - | uname -r | | collector/proc\_uptime.lua | +| version | - | uname -r | | collector/proc\_uptime.lua | +| release | - | uname -r | | collector/proc\_uptime.lua | +| machine | - | uname -r | | collector/proc\_uptime.lua | +| sysname | - | uname -r | | collector/proc\_uptime.lua | + +## 网络指标 + +----------- + +### arp + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | ---: | :---- | :---- | :--- | +| count | 个 | 网卡名 | 网卡上对应arp表数量 | collector/proc\_arp.lua | + +### networks + +这是网卡流量统计信息,已做差值处理 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | ---: | :---- | :---- | :--- | +| if\_ocompressed | 个 | network\_name 网卡名 | 发送时,设备驱动程序发送或接收的压缩数据包数 | collector/proc\_netdev.lua | +| if\_ocarrier | 个 | network\_name 网卡名 | 发送时,由于carrier错误而丢弃的数据包数 | collector/proc\_netdev.lua | +| if\_ocolls | 个 | network\_name 网卡名 | 发送时,冲突信息包的数目 | collector/proc\_netdev.lua | +| if\_ofifo | 个 | network\_name 网卡名 | 发送时,FIFO缓冲区错误的数量 | collector/proc\_netdev.lua | +| if\_obytes | Byte | network\_name 网卡名 | 发送时,数据的总字节数 | collector/proc\_netdev.lua | +| if\_odrop | 个 | network\_name 网卡名 | 发送时,设备驱动程序丢弃的数据包总数 | collector/proc\_netdev.lua | +| if\_oerrs | 个 | network\_name 网卡名 | 发送时,错误的总数 | collector/proc\_netdev.lua | +| if\_opackets | 个 | network\_name 网卡名 | 发送时,数据包总数 | collector/proc\_netdev.lua | +| if\_icompressed | 个 | network\_name 网卡名 | 接收时,设备驱动程序发送或接收的压缩数据包数 | collector/proc\_netdev.lua | +| if\_ierrs | 个 | network\_name 网卡名 | 接收时,错误的总数 | collector/proc\_netdev.lua | +| if\_ififo | 个 | network\_name 网卡名 | 接收时,FIFO缓冲区错误的数量 | collector/proc\_netdev.lua | +| if\_iframe | 个 | network\_name 网卡名 | 接收时,分组帧错误的数量 | collector/proc\_netdev.lua | +| if\_ipackets | 个 | network\_name 网卡名 | 接收时,数据包总数 | collector/proc\_netdev.lua | +| if\_idrop | 个 | network\_name 网卡名 | 接收时,设备驱动程序丢弃的数据包总数 | collector/proc\_netdev.lua | +| if\_imulticast | 个 | network\_name 网卡名 | 接收时,多播帧数 | collector/proc\_netdev.lua | +| if\_ibytes | 个 | network\_name 网卡名 | 接收时,数据字节总数 | collector/proc\_netdev.lua | + +### pkt_status + +这里统计所有包状态,详细可以通过 pkt_logs 获取 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | ---: | :---- | :---- | :--- | +| abort | 次 | | 协议栈断言失效次数 | collector/proc\_snmp\_stat.lua | +| overflow | 次 | | 协议栈溢出次数 | collector/proc\_snmp\_stat.lua | +| err | 次 | | 协议栈错误次数 | collector/proc\_snmp\_stat.lua | +| paws | 次 | | 协议栈PAWS回绕次数 | collector/proc\_snmp\_stat.lua | +| fail | 次 | | 协议栈failure次数 | collector/proc\_snmp\_stat.lua | +| retrans | 次 | | 协议栈溢出次数 | collector/proc\_snmp\_stat.lua | +| drop | 次 | | 协议栈丢包次数 | collector/proc\_snmp\_stat.lua | + +### sock_stat + +统计所有包状态。[参考连接](https://developer.aliyun.com/article/484451) + + 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | ---: | :---- | :---- | :--- | +| frag\_inuse | 个 | | 使用的IP段数量 | collector/proc\_sockstat.lua | +| frag\_memory | 页 | | IP段使用内存数量 | collector/proc\_sockstat.lua | +| udplite\_inuse | 个 | | udplite 使用量 | collector/proc\_sockstat.lua | +| udp\_mem | 页 | | udp socket 内存使用量,含收发缓冲区队列 | collector/proc\_sockstat.lua | +| udp\_inuse | 个 | | udp 使用量 | collector/proc\_sockstat.lua | +| tcp\_mem | 页 | | udp socket 内存使用量,含收发缓冲区队列 | collector/proc\_sockstat.lua | +| tcp\_alloc | 个 | | TCP socket 申请总数 | collector/proc\_sockstat.lua | +| tcp\_tw | 个 | | TCP time wait socket 总数 | collector/proc\_sockstat.lua | +| tcp\_orphan | 个 | | TCP ophan socket 总数 | collector/proc\_sockstat.lua | +| tcp\_inuse | 个 | | TCP 常规 socket 总数 | collector/proc\_sockstat.lua | +| raw\_inuse | 个 | | raw socket 使用量 | collector/proc\_sockstat.lua | +| sockets\_used | 个 | | 总socket 使用量 | collector/proc\_sockstat.lua | + + +### softnets + +This parser parses the stats from network devices. These stats includes events per cpu\(in row\), number of packets processed i.e packet_process \(first column\), number of packet drops packet\_drops \(second column\), time squeeze eg net\_rx\_action performed time_squeeze\(third column\), cpu collision eg collision occur while obtaining device lock while transmitting cpu\_collision packets \(eighth column\), received_rps number of times cpu woken up received\_rps \(ninth column\), number of times reached flow limit count flow\_limit\_count \(tenth column\), backlog status \(eleventh column\), core id \(twelfth column\). + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | ---: | :---- | :---- | :--- | +| packet\_process | 个 | cpu,对应CPU号 | 所在核收包个数 | collector/proc\_softnet\_stat.lua | +| packet\_drop | 个 | cpu,对应CPU号 | 所在核丢包个数 | collector/proc\_softnet\_stat.lua | +| cpu\_collision | 个 | cpu,对应CPU号 | collision occur while obtaining device lock while transmitting. | collector/proc\_softnet\_stat.lua | +| received\_rps | 个 | cpu,对应CPU号 | number of times cpu woken up received_rps. | collector/proc\_softnet\_stat.lua | +| time\_squeeze | 个 | cpu,对应CPU号 | net\_rx\_action. | collector/proc\_softnet\_stat.lua | +| flow\_limit\_count | 个 | cpu,对应CPU号 | number of times reached flow limit count. | collector/proc\_softnet\_stat.lua | + +### cgroups 表 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | --- | :---- | :---- | :--- | +| type | - | subsys类型 | | collector/proc\_cgroups.lua | +| blkio | 个 | blkio cgroup 数量 | | collector/proc\_cgroups.lua | +| freezer | 个 | freezer cgroup数量 | | collector/proc\_cgroups.lua | +| devices | 个 | devices cgroup数量 | | collector/proc\_cgroups.lua | +| hugetlb | 个 | hugetlb cgroup数量 | | collector/proc\_cgroups.lua | +| pids | 个 | blkio cgroup 数量 | | collector/proc\_cgroups.lua | +| rdma | 个 | rdma cgroup数量 | | collector/proc\_cgroups.lua | +| net\_prio | 个 | net_prio cgroup数量 | | collector/proc\_cgroups.lua | +| net\_cls | 个 | net_cls cgroup数量 | | collector/proc\_cgroups.lua | +| cpu | 个 | cpu cgroup 数量 | | collector/proc\_cgroups.lua | +| cpuacct | 个 | cpuacct cgroup数量 | | collector/proc\_cgroups.lua | +| perf\_event | 个 | perf_event cgroup数量 | | collector/proc\_cgroups.lua | +| memory | 个 | memory cgroup数量 | | collector/proc\_cgroups.lua | + +### interrupts 表 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | --- | :---- | :---- | :--- | +| cpu | - | CPU ID | | collector/proc\_interrupts.lua | +| 中断名称 | 次 | 中断触发次数 | | collector/proc\_interrupts.lua | + +### mounts 表 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | --- | :---- | :---- | :--- | +| fs | - | sysfs | | collector/proc\_mounts.lua | +| mount | - | 挂载目录 | | collector/proc\_mounts.lua | +| f\_bsize | - | Filesystem block size | | collector/proc\_mounts.lua | +| f\_blocks | - | Size of fs in f_frsize units | | collector/proc\_mounts.lua | +| f\_bfree | - | Number of free blocks | | collector/proc\_mounts.lua | +| f\_bavail | - | Number of free blocks for unprivileged users | | collector/proc\_mounts.lua | +| f\_files | - | Number of inodes | | collector/proc\_mounts.lua | +| f\_ffree | - | Number of free inodes | | collector/proc\_mounts.lua | +| f\_favail | - | Number of free inodes for unprivileged users | | collector/proc\_mounts.lua | + +### softirqs 表 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | --- | :---- | :---- | :--- | +| cpu | - | CPU ID | | collector/proc\_softirqs.lua | +| HI | 次 | HI软中断触发次数 | | collector/proc\_softirqs.lua | +| TIMER | 次 | TIMER软中断触发次数 | | collector/proc\_softirqs.lua | +| NET\_TX | 次 | NET\_TX软中断触发次数 | | collector/proc\_softirqs.lua | +| NET\_RX | 次 | NET\_RX软中断触发次数 | | collector/proc\_softirqs.lua | +| BLOCK | 次 | BLOCK软中断触发次数 | | collector/proc\_softirqs.lua | +| IRQ_POLL | 次 | IRQ\_POLL软中断触发次数 | | collector/proc\_softirqs.lua | +| TASKLET | 次 | TASKLET软中断触发次数 | | collector/proc\_softirqs.lua | +| SCHED | 次 | SCHED软中断触发次数 | | collector/proc\_softirqs.lua | +| HRTIMER | 次 | HRTIMER软中断触发次数 | | collector/proc\_softirqs.lua | +| RCU | 次 | RCU软中断触发次数 | | collector/proc\_softirqs.lua | + +### self_statm 表 +统计监控进程的statm信息 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | --- | :---- | :---- | :--- | +| size | - | total program size | | collector/proc\_statm.lua | +| resident | - | resident set size | | collector/proc\_statm.lua | +| shared | - | number of resident shared pages | | collector/proc\_statm.lua | +| text | - | text (code) | | collector/proc\_statm.lua | +| lib | - | library | | collector/proc\_statm.lua | +| data | - | data + stack | | collector/proc\_statm.lua | +| dt | - | dirty pages | | collector/proc\_statm.lua | diff --git a/source/tools/monitor/unity/beaver/guide/oop.md b/source/tools/monitor/unity/beaver/guide/oop.md index 90c265bc6927e9b562df770a247f56fceb3498c3..87c4a6754e752649827246147bd32a627a11a880 100644 --- a/source/tools/monitor/unity/beaver/guide/oop.md +++ b/source/tools/monitor/unity/beaver/guide/oop.md @@ -113,4 +113,4 @@ Ctwo 继承于Cone,这里重新实现并复用了父类的say方法。 function Cone:say() function Cone.say(self) -[返回目录](/guide) +[返回目录](/guide/guide.md) diff --git a/source/tools/monitor/unity/beaver/guide/proc_probe.md b/source/tools/monitor/unity/beaver/guide/proc_probe.md index ac412ac6c83db8d42dad220b6049c8d67c0e860e..3283c9bc5e96e4a90a094158e29eb80b9f7bddea 100644 --- a/source/tools/monitor/unity/beaver/guide/proc_probe.md +++ b/source/tools/monitor/unity/beaver/guide/proc_probe.md @@ -22,4 +22,4 @@ libbpf kprobe/kretprobe/trace\_event/perf event 等事件记录在这里 | ----- | --------- | | xxx | xxx | -[返回目录](/guide) +[返回目录](/guide/guide.md) diff --git a/source/tools/monitor/unity/beaver/guide/pystring.md b/source/tools/monitor/unity/beaver/guide/pystring.md index 5e795f4bcb036c979bf099737770d1ce889ffa77..c783d0b2d50f219eef093c0e8c5ed329ce01683f 100644 --- a/source/tools/monitor/unity/beaver/guide/pystring.md +++ b/source/tools/monitor/unity/beaver/guide/pystring.md @@ -1,4 +1,5 @@ # 字符串处理 +![pystring](image/python.png) 同为脚本语言,lua 默认的字符串处理并不像python 那么完善。但只要通过拓展,也可以像python 一样对字符串进行处理。当前已经实现了split/strip 等高频使用函数。参考[Python字符串处理](https://www.jianshu.com/p/b758332c44bb) @@ -98,4 +99,4 @@ find 用于子串查找,成功返回首次开始的位置,如果不包含, assert(pystring:find("hello world.", "hello") == 1) ``` -[返回目录](/guide) \ No newline at end of file +[返回目录](/guide/guide.md) \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/webdevel.md b/source/tools/monitor/unity/beaver/guide/webdevel.md index 1fa5b829a1f845e248a2b7a65bbdbdcbc222c868..b23efd6d3c2e76031c8073e73a6215e04964ca25 100644 --- a/source/tools/monitor/unity/beaver/guide/webdevel.md +++ b/source/tools/monitor/unity/beaver/guide/webdevel.md @@ -58,11 +58,11 @@ end return CurlGuide ``` -这里采用了面向对象方法实现,关于面向对象,可以[参考这里](/guide/oop) +这里采用了面向对象方法实现,关于面向对象,可以[参考这里](/guide/oop.md) ## 热更新 * 如果仅修改了markdown文件,直接更新文件刷新页面即可; * 如果修改了lua文件,给主进程发送1号信号,进程会重新装载,新页面也会立即生效; -[返回目录](/guide) \ No newline at end of file +[返回目录](/guide/guide.md) \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/identity.lua b/source/tools/monitor/unity/beaver/identity.lua index 27582ac6379291c6ded83aea88a6f86f7ed603e5..220b60661e47af0f4c108f0294a8480876e5ac51 100644 --- a/source/tools/monitor/unity/beaver/identity.lua +++ b/source/tools/monitor/unity/beaver/identity.lua @@ -25,8 +25,8 @@ end local function getAdd(hostName) local _, resolved = socket.dns.toip(hostName) local listTab = {} - for _, v in pairs(resolved.ip) do - table.insert(listTab, v) + for i, v in pairs(resolved.ip) do + listTab[i] = v end return listTab end @@ -41,11 +41,11 @@ function Cidentity:hostip() end function Cidentity:curl() - if self._opts.curl then + if self._opts.url then local ChttpCli = require("httplib.httpCli") local cli = ChttpCli.new() - local res = cli:get(self._opts.curl) + local res = cli:get(self._opts.url) return res.body else return "None" @@ -57,15 +57,16 @@ function Cidentity:hostname() end function Cidentity:file() + local res = "None" if self._opts.path then local file = io.open(self._opts.path, "r") - io.input(file) - local res = io.read() - io.close(file) - return res - else - return "None" + if file then + io.input(file) + res = io.read() + io.close(file) + end end + return res end function Cidentity:specify() @@ -80,4 +81,4 @@ function Cidentity:id() return self._funcs[self._opts.mode]() end -return Cidentity \ No newline at end of file +return Cidentity diff --git a/source/tools/monitor/unity/beaver/index.lua b/source/tools/monitor/unity/beaver/index.lua index c3ce944ce09128543ae7765c8adc1984cbed0d5e..270ac24ffb367c88cd4335b0b1fdbe574f2f0bd9 100644 --- a/source/tools/monitor/unity/beaver/index.lua +++ b/source/tools/monitor/unity/beaver/index.lua @@ -52,7 +52,8 @@ function CurlIndex:show(tReq) ### Tips - This page is rendered directly via markdown, for [guide](/guide) + This page is rendered directly via markdown, for [guide](/guide/guide.md) + local data [query entry](/query/base) ]] local content2 = string.format("\n thread id is:%d\n", unistd.getpid()) local title = "welcome to visit SysAk Agent server." diff --git a/source/tools/monitor/unity/beaver/localBeaver.lua b/source/tools/monitor/unity/beaver/localBeaver.lua index 5671ce87c5b841982eee678acf7d0ed77f31fdf3..748a20e14b7d6db873968c0caf6bfde113b8cc0c 100644 --- a/source/tools/monitor/unity/beaver/localBeaver.lua +++ b/source/tools/monitor/unity/beaver/localBeaver.lua @@ -17,12 +17,17 @@ local function setupServer(fYaml) local port = config["port"] or 8400 local ip = config["bind_addr"] or "0.0.0.0" local backlog = config["backlog"] or 32 - return port, ip, backlog + local unix_socket = config["unix_socket"] + return port, ip, backlog,unix_socket end function CLocalBeaver:_init_(frame, fYaml) - local port, ip, backlog = setupServer(fYaml) - self._bfd = self:_install_fd(port, ip, backlog) + local port, ip, backlog, unix_socket = setupServer(fYaml) + if not unix_socket then + self._bfd = self:_install_fd(port, ip, backlog) + else + self._bfd = self:_install_fd_unisock(backlog, unix_socket) + end self._efd = self:_installFFI() self._cos = {} @@ -34,6 +39,13 @@ function CLocalBeaver:_init_(frame, fYaml) end function CLocalBeaver:_del_() + for fd in pairs(self._cos) do + socket.shutdown(fd, socket.SHUT_RDWR) + local res = self._cffi.del_fd(self._efd, fd) + print("close fd: " .. fd) + assert(res >= 0) + end + if self._efd then self._cffi.deinit(self._efd) end @@ -42,11 +54,6 @@ function CLocalBeaver:_del_() end end -local function posixError(msg, err, errno) - local s = msg .. string.format(": %s, errno: %d", err, errno) - error(s) -end - function CLocalBeaver:_installTmo(fd) self._tmos[fd] = os.time() end @@ -57,7 +64,7 @@ function CLocalBeaver:_checkTmo() -- ! coroutine will del self._tmos cell in loop, so create a mirror table for safety local tmos = system:dictCopy(self._tmos) for fd, t in pairs(tmos) do - if now - t >= 60 then + if now - t >= 10 * 60 then local e = self._ffi.new("native_event_t") e.ev_close = 1 e.fd = fd @@ -81,25 +88,76 @@ function CLocalBeaver:_installFFI() return efd end +local function localBind(fd, tPort) + local try = 0 + local res, err, errno + + -- can reuse for time wait socket. + res, err, errno = socket.setsockopt(fd, socket.SOL_SOCKET, socket.SO_REUSEADDR, 1); + if not res then + system:posixError("set sock opt failed."); + end + + while try < 120 do + res, err, errno = socket.bind(fd, tPort) + if res then + return 0 + elseif errno == 98 then -- port already in use? try 30s; + unistd.sleep(1) + try = try + 1 + else + break + end + end + system:posixError(string.format("bind port %d failed.", tPort.port), err, errno) +end + +function CLocalBeaver:_install_fd_unisock(backlog,unix_socket) + local fd, res, err, errno + unistd.unlink(unix_socket) + fd, err, errno = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0) + if fd then -- for socket + local tPort = {family=socket.AF_UNIX, path=unix_socket} + local r, msg = pcall(localBind, fd, tPort) + if r then + res, err, errno = socket.listen(fd, backlog) + if res then -- for listen + return fd + else + unistd.close(fd) + system:posixError("socket listen failed", err, errno) + end + else + print(msg) + unistd.close(fd) + os.exit(1) + end + else -- socket failed + system:posixError("create socket failed", err, errno) + end +end + function CLocalBeaver:_install_fd(port, ip, backlog) local fd, res, err, errno fd, err, errno = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) if fd then -- for socket - res, err, errno = socket.bind(fd, {family=socket.AF_INET, addr=ip, port=port}) - if res then -- for bind + local tPort = {family=socket.AF_INET, addr=ip, port=port} + local r, msg = pcall(localBind, fd, tPort) + if r then res, err, errno = socket.listen(fd, backlog) if res then -- for listen return fd else - posixError("socket listen failed", err, errno) + unistd.close(fd) + system:posixError("socket listen failed", err, errno) end - else -- for bind failed + else + print(msg) unistd.close(fd) - posixError("socket bind failed", err, errno) os.exit(1) end else -- socket failed - posixError("create socket failed", err, errno) + system:posixError("create socket failed", err, errno) end end @@ -120,7 +178,7 @@ function CLocalBeaver:read(fd, maxLen) return nil end else - posixError("socket recv error", err, errno) + system:posixError("socket recv error", err, errno) end else print(system:dump(e)) @@ -137,7 +195,6 @@ function CLocalBeaver:write(fd, stream) sent, err, errno = socket.send(fd, stream) if sent then if sent < #stream then -- send buffer may full - print("need to send buffer for " .. (#stream - sent)) res = self._cffi.mod_fd(self._efd, fd, 1) -- epoll write ev assert(res == 0) @@ -149,27 +206,33 @@ function CLocalBeaver:write(fd, stream) stream = string.sub(stream, sent + 1) sent, err, errno = socket.send(fd, stream) if sent == nil then - posixError("socket send error.", err, errno) + if errno == 11 then -- EAGAIN ? + goto continue + end + system:posixError("socket send error.", err, errno) return nil end else -- need to read ? may something error or closed. return nil end + ::continue:: end res = self._cffi.mod_fd(self._efd, fd, 0) -- epoll read ev only assert(res == 0) end return 1 else - posixError("socket send error.", err, errno) + system:posixError("socket send error.", err, errno) return nil end end function CLocalBeaver:_proc(fd) local fread = self:read(fd) + local session = {} + local res, alive while true do - local res, alive = self._frame:proc(fread) + res, alive, session = self._frame:proc(fread, session) if res then local stat = self:write(fd, res) @@ -211,12 +274,12 @@ function CLocalBeaver:accept(fd, e) self:co_add(nfd) self:_installTmo(nfd) else - posixError("accept new socket failed", err, errno) + system:posixError("accept new socket failed", err, errno) end end end -function CLocalBeaver:_poll(bfd, nes) +function CLocalBeaver:_pollFd(bfd, nes) for i = 0, nes.num - 1 do local e = nes.evs[i]; local fd = e.fd @@ -233,10 +296,7 @@ function CLocalBeaver:_poll(bfd, nes) self:_checkTmo() end -function CLocalBeaver:poll() - assert(self._once, "poll loop only run once time.") - self._once = false - +function CLocalBeaver:_poll() local bfd = self._bfd local efd = self._efd while true do @@ -244,17 +304,21 @@ function CLocalBeaver:poll() local res = self._cffi.poll_fds(efd, 10, nes) if res < 0 then - break + return "end poll." end - self:_poll(bfd, nes) + self:_pollFd(bfd, nes) end +end + +function CLocalBeaver:poll() + assert(self._once, "poll loop only run once time.") + self._once = false + + local _, msg = pcall(self._poll, self) + print(msg) - for fd in pairs(self._cos) do - local res = self._cffi.del_fd(self._efd, fd) - assert(res >= 0) - end return 0 end -return CLocalBeaver \ No newline at end of file +return CLocalBeaver diff --git a/source/tools/monitor/unity/beaver/native/Makefile b/source/tools/monitor/unity/beaver/native/Makefile index 583c418ad408c5e7ed0d7ad46d6625b3132c74a8..48c42266ca239e4809518431915bc0fc23ec32d5 100644 --- a/source/tools/monitor/unity/beaver/native/Makefile +++ b/source/tools/monitor/unity/beaver/native/Makefile @@ -13,7 +13,7 @@ $(SO): $(OBJS) $(CC) -o $@ $(OBJS) $(LDFLAG) install: $(SO) - cp $(SO) ../../collector/native/ + cp $(SO) ../../beeQ/lib clean: rm -f $(SO) $(OBJS) \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/query/baseQuery.lua b/source/tools/monitor/unity/beaver/query/baseQuery.lua new file mode 100644 index 0000000000000000000000000000000000000000..109f67c1d07d5ca3758210ff27336d2495de4881 --- /dev/null +++ b/source/tools/monitor/unity/beaver/query/baseQuery.lua @@ -0,0 +1,246 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/2/27 11:41 PM +--- + +require("common.class") +local system = require("common.system") +local CfoxTSDB = require("tsdb.foxTSDB") +local ChttpHtml = require("httplib.httpHtml") + +local CbaseQuery = class("baseQuery", ChttpHtml) + +function CbaseQuery:_init_(frame, fYaml) + ChttpHtml._init_(self) + self._urlCb["/query/base"] = function(tReq) return self:base(tReq) end + self._urlCb["/query/baseQ"] = function(tReq) return self:baseQ(tReq) end + self._fox = CfoxTSDB.new(fYaml) + self:_install(frame) +end + +local function packForm1(forms) + forms[1] = '
' +end + +local function packForm2(forms) + table.insert(forms, '\n
') +end + +local function packTimeFormat(forms, session) + session.gmt = session.gmt or "0" + table.insert(forms, '') + if session.gmt == '1' then + table.insert(forms, 'GMT 时间') + table.insert(forms, '本地时间') + else + table.insert(forms, 'GMT 时间') + table.insert(forms, '本地时间') + end + table.insert(forms, '
') +end + +local formTableHead = [[ + + +
+]] +local function packTables(forms, session, tables) + session.selTable = session.selTable or tables[1] + table.insert(forms, formTableHead) + local len = #forms + for i, tbl in ipairs(tables) do + if tbl == session.selTable then + forms[i + len] = string.format('', tbl, tbl) + else + forms[i + len] = string.format('', tbl, tbl) + end + end + table.insert(forms, formTableEnd) +end + +local formTLHead = [[ + + +
+]] +local formTLIndex = {'5', '10', '20', '30', '60', '120', '240', '720', '1440'} +local formTLKV = { + ["5"] = "5m", ["10"] = "10m", ["20"] = "20m", ["30"] = "30m", ["60"] = "1h", + ["120"] = "2h", ["240"] = "4h", ["720"] = "6h", ["1440"] = "24h", +} +local function packTimeLen(forms, session) + session.timeLen = session.timeLen or "30" + table.insert(forms, formTLHead) + for _, k in ipairs(formTLIndex) do + if k == session.timeLen then + table.insert(forms, string.format('', k, formTLKV[k])) + else + table.insert(forms, string.format('', k, formTLKV[k])) + end + end + table.insert(forms, formTLEnd) +end + +local function packForm(session, tables) + local forms = {} + packForm1(forms) + packTimeFormat(forms, session) + packTables(forms, session, tables) + packTimeLen(forms, session) + packForm2(forms) + return table.concat(forms, "\n") +end + +function CbaseQuery:qTables(session, fresh) + fresh = fresh or false + local t = session.qlast or 4 * 60 + if session.tables == nil or fresh then + session.tables = self._fox:qTabelNow(t * 60) + end +end + +function CbaseQuery:base(tReq) + local res = {title="Beaver Query"} + self:qTables(tReq.session) + res.content = packForm(tReq.session, tReq.session.tables) + return res +end + +function CbaseQuery:setSession(queries, session) + if queries.selTable then + session.selTable = queries.selTable + session.gmt = queries.gmt + session.timeLen = queries.timeLen + end +end + +local function escape(s) + if type(s) == "string" then + s = system:escHtml(s) + return system:escMd(s) + end + return "None" +end + +local function packDataHead(res, labels, values, logs) + local heads = system:listMerge({"time"}, labels, values, logs) + local show_head = {} + for i, v in ipairs(heads) do + show_head[i] = escape(v) + end + table.insert(res, table.concat({"| ", table.concat(show_head, " | "), " |"})) + + local aligns = {} + table.insert(aligns, "---:") -- for time align left + for _, _ in ipairs(labels) do + table.insert(aligns, ":---") -- for labels align right + end + for _, _ in ipairs(values) do + table.insert(aligns, ":---:") -- for values align center + end + for _, _ in ipairs(logs) do + table.insert(aligns, ":---") -- for values align right + end + table.insert(res, table.concat({"| ", table.concat(aligns, " | "), " |"})) +end + + +local function packDataBody(res, fmt, ms, labels, values, logs) + local len = #res + for i, m in ipairs(ms) do + local datas = {} + local ii = 1 + if fmt then + datas[ii] = os.date("!%x %X", tonumber(m.time) / 1000000) + else + datas[ii] = os.date("%x %X", tonumber(m.time) / 1000000) + end + ii = ii + 1 + + for _, k in ipairs(labels) do + datas[ii] = escape(m.labels[k]) + ii = ii + 1 + end + for _, k in ipairs(values) do + local v = m.values[k] + if v then + datas[ii] = string.format("%7.2f", m.values[k]) + else + datas[ii] = "None" + end + ii = ii + 1 + end + for _, k in ipairs(logs) do + datas[ii] = escape(m.logs[k]) + ii = ii + 1 + end + local data = table.concat({"| ", table.concat(datas, " | "), " |"}) + res[len + i] = data + end +end + +local function packDataTabel(res, ms, tFmt) + if #ms > 0 then + local fmt = false + if tFmt == "1" then + fmt = true + end + + local labels, values, logs = {}, {}, {} + for k, _ in pairs(ms[1].labels) do + table.insert(labels, k) + end + for k, _ in pairs(ms[1].values) do + table.insert(values, k) + end + for k, _ in pairs(ms[1].logs) do + table.insert(logs, k) + end + packDataHead(res, labels, values, logs) + packDataBody(res, fmt, ms, labels, values, logs) + end +end + +function CbaseQuery:baseQ(tReq) + local res = {title="Beaver Query"} + local contents = {} + local session = tReq.session + + if tReq.queries then + self:setSession(tReq.queries, session) + end + + if session.selTable == nil then + contents[1] = "查询表未设置,将跳转会设置页面." + contents[2] = '' + res.content = table.concat(contents, "\n") + return res + end + + local ms = self._fox:qNow(tonumber(session.timeLen) * 60, + {session.selTable}) + table.insert(contents, "# 反馈输入\n") + table.insert(contents, "* 表名: " .. system:escMd(session.selTable)) + table.insert(contents, "* 时间戳: " .. session.gmt) + table.insert(contents, "* 时长: " .. session.timeLen) + table.insert(contents, "\n") + + table.insert(contents, "# 显示表格\n") + + packDataTabel(contents, ms, session.gmt) + + table.insert(contents, "[返回](/query/base)") + table.insert(contents, "[刷新](/query/baseQ)") + + res.content = self:markdown(table.concat(contents, "\n")) + return res +end + +return CbaseQuery diff --git a/source/tools/monitor/unity/beaver/url_api.lua b/source/tools/monitor/unity/beaver/url_api.lua index 4eab10dd5bba1622c3d1314fcdbd16fae2abd7d0..e78df15419e3a9ef16001da7425185a5c45724d0 100644 --- a/source/tools/monitor/unity/beaver/url_api.lua +++ b/source/tools/monitor/unity/beaver/url_api.lua @@ -10,13 +10,13 @@ local ChttpApp = require("httplib.httpApp") local CfoxTSDB = require("tsdb.foxTSDB") local CurlApi = class("urlApi", ChttpApp) -function CurlApi:_init_(frame) +function CurlApi:_init_(frame, fYaml) ChttpApp._init_(self) self._urlCb["/api/sum"] = function(tReq) return self:sum(tReq) end self._urlCb["/api/sub"] = function(tReq) return self:sub(tReq) end self._urlCb["/api/query"] = function(tReq) return self:query(tReq) end self:_install(frame) - self:_setupQs() + self:_setupQs(fYaml) end function CurlApi:sum(tReq) @@ -89,8 +89,8 @@ function CurlApi:qtable(tJson) return self.fox:qTabelNow(secs) end -function CurlApi:_setupQs() - self.fox = CfoxTSDB.new() +function CurlApi:_setupQs(fYaml) + self.fox = CfoxTSDB.new(fYaml) self._q = {} self._q["last"] = function(tJson) return self:qlast(tJson) end self._q["table"] = function(tJson) return self:qtable(tJson) end diff --git a/source/tools/monitor/unity/beaver/url_export_raw.lua b/source/tools/monitor/unity/beaver/url_export_raw.lua index b6c904bf177105d9f26c650d81090f0133d0348f..2629fba8a12cb52f146c31a4f191d4ff9511a726 100644 --- a/source/tools/monitor/unity/beaver/url_export_raw.lua +++ b/source/tools/monitor/unity/beaver/url_export_raw.lua @@ -14,6 +14,7 @@ function CurlExportRaw:_init_(frame, export) self._export = export self._urlCb["/export/metrics"] = function(tReq) return self:show(tReq) end + self._urlCb["/metrics"] = function(tReq) return self:show(tReq) end self:_install(frame) end diff --git a/source/tools/monitor/unity/beaver/url_guide.lua b/source/tools/monitor/unity/beaver/url_guide.lua index 1362b01e3c9530432e360dcadbee6c942fe7330a..fcb6cd5ca7bef90c427d6055e4254e1dedcce6e4 100644 --- a/source/tools/monitor/unity/beaver/url_guide.lua +++ b/source/tools/monitor/unity/beaver/url_guide.lua @@ -5,56 +5,20 @@ --- require("common.class") +local pystring = require("common.pystring") local ChttpHtml = require("httplib.httpHtml") local CurlGuide = class("CurlIndex", ChttpHtml) function CurlGuide:_init_(frame) ChttpHtml._init_(self) - self._urlCb["/guide"] = function(tReq) return self:guide(tReq) end - self._urlCb["/guide/hotplugin"] = function(tReq) return self:hotplugin(tReq) end - self._urlCb["/guide/oop"] = function(tReq) return self:oop(tReq) end - self._urlCb["/guide/pystring"] = function(tReq) return self:pystring(tReq) end - self._urlCb["/guide/webdevel"] = function(tReq) return self:webdevel(tReq) end - self._urlCb["/guide/proc_probe"] = function(tReq) return self:proc_probe(tReq) end - self._urlCb["/guide/dev_proc"] = function(tReq) return self:dev_proc(tReq) end - self:_install(frame) + self:_installRe("^/guide*", frame) + self._head = "/" -- need to strip + self._filePath = "../beaver/" end -local function loadFile(fPpath) - local path = "../beaver/guide/" .. fPpath - local f = io.open(path,"r") - local s = f:read("*all") - f:close() - return s -end - -function CurlGuide:guide(tReq) - return {title="guide", content=self:markdown(loadFile("guide.md"))} -end - -function CurlGuide:hotplugin(tReq) - return {title="hotplugin", content=self:markdown(loadFile("hotplugin.md"))} -end - -function CurlGuide:oop(tReq) - return {title="oop", content=self:markdown(loadFile("oop.md"))} -end - -function CurlGuide:pystring(tReq) - return {title="pystring", content=self:markdown(loadFile("pystring.md"))} -end - -function CurlGuide:webdevel(tReq) - return {title="webdevel", content=self:markdown(loadFile("webdevel.md"))} -end - -function CurlGuide:proc_probe(tReq) - return {title="proc and probes", content=self:markdown(loadFile("proc_probe.md"))} -end - -function CurlGuide:dev_proc(tReq) - return {title="develop proc interface.", content=self:markdown(loadFile("dev_proc.md"))} +function CurlGuide:callRe(tReq, keep) + return self:reSource(tReq, keep, self._head, self._filePath) end return CurlGuide diff --git a/source/tools/monitor/unity/beeQ/Makefile b/source/tools/monitor/unity/beeQ/Makefile index 1ad7ee4d865142a3767927ef99eb95014755de93..693514e9905f89376981aa363cd937eb8f4efb2e 100644 --- a/source/tools/monitor/unity/beeQ/Makefile +++ b/source/tools/monitor/unity/beeQ/Makefile @@ -2,11 +2,11 @@ LIB= -lpthread -ldl CC=gcc CFLAG := -g -I../beaver -I../collector/outline -LDFLAG := -g -lm -ldl -lrt -lpthread -lluajit-5.1 -L./lib/ -lbeeQ -L../beaver -lbeaver -L../collector/outline/ -loutline -L../collector/plugin/ -lproto_sender -L../collector/native/ -lprocffi +LDFLAG := -g -lm -ldl -lrt -lpthread -lluajit-5.1 -L./lib/ -lbeeQ -L../beaver -lbeaver -lcollectorApi -L../collector/outline/ -loutline -L../collector/plugin/ -lproto_sender PRG=unity-mon OBJ=apps.o bees.o -DEPMOD=lib ../beaver ../collector/native ../collector/outline ../collector/plugin ../tsdb/native +DEPMOD=lib ../beaver ../collector/native ../collector/interface ../collector/outline ../collector/plugin ../tsdb/native $(PRG): $(DEPMOD) $(OBJ) $(CC) $(LIB) -o $@ $(OBJ) $(LDFLAG) diff --git a/source/tools/monitor/unity/beeQ/apps.c b/source/tools/monitor/unity/beeQ/apps.c index a0cdc50ae2c5b6047d35800182378278645b4cdb..e1ea2932aadf36ba2a736813e278fb6f230ed860 100644 --- a/source/tools/monitor/unity/beeQ/apps.c +++ b/source/tools/monitor/unity/beeQ/apps.c @@ -12,25 +12,66 @@ #include #define gettidv1() syscall(__NR_gettid) -static int sample_period = 0; extern char *g_yaml_file; -LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, int level); +static int lua_traceback(lua_State *L) +{ + const char *errmsg = lua_tostring(L, -1); + lua_getglobal(L, "debug"); + lua_getfield(L, -1, "traceback"); + lua_call(L, 0, 1); + printf("%s \n%s\n", errmsg, lua_tostring(L, -1)); + return 1; +} + +int lua_reg_errFunc(lua_State *L) { + lua_pushcfunction(L, lua_traceback); + return lua_gettop(L); +} -static void report_lua_failed(lua_State *L) { - fprintf(stderr, "\nFATAL ERROR:%s\n\n", lua_tostring(L, -1)); +int lua_check_ret(int ret) { + switch (ret) { + case 0: + break; + case LUA_ERRRUN: + printf("lua runtime error.\n"); + break; + case LUA_ERRMEM: + printf("lua memory error.\n"); + case LUA_ERRERR: + printf("lua exec error.\n"); + case LUA_ERRSYNTAX: + printf("file syntax error.\n"); + case LUA_ERRFILE: + printf("load lua file error.\n"); + default: + printf("bad res for %d\n", ret); + exit(1); + } + return ret; } -static int call_init(lua_State *L) { +int lua_load_do_file(lua_State *L, const char* path) { + int err_func = lua_gettop(L); + int ret; + + ret = luaL_loadfile(L, path); + if (ret) { + return lua_check_ret(ret); + } + ret = lua_pcall(L, 0, LUA_MULTRET, err_func); + return lua_check_ret(ret); +} + +static int call_init(lua_State *L, int err_func) { int ret; lua_Number lret; lua_getglobal(L, "init"); lua_pushinteger(L, (int)gettidv1()); - ret = lua_pcall(L, 1, 1, 0); + lua_pushstring(L, g_yaml_file); + ret = lua_pcall(L, 2, 1, err_func); if (ret) { - perror("luaL_call init func error"); - report_lua_failed(L); goto endCall; } @@ -56,7 +97,7 @@ static int call_init(lua_State *L) { static lua_State * app_recv_init(void) { int ret; - + int err_func; /* create a state and load standard library. */ lua_State *L = luaL_newstate(); if (L == NULL) { @@ -65,19 +106,14 @@ static lua_State * app_recv_init(void) { } /* opens all standard Lua libraries into the given state. */ luaL_openlibs(L); + err_func = lua_reg_errFunc(L); - ret = luaL_dofile(L, "bees.lua"); + ret = lua_load_do_file(L, "../beeQ/bees.lua"); if (ret) { - const char *msg = lua_tostring(L, -1); - perror("luaL_dofile error"); - if (msg) { - luaL_traceback(L, L, msg, 0); - fprintf(stderr, "FATAL ERROR:%s\n\n", msg); - } goto endLoad; } - ret = call_init(L); + ret = call_init(L, err_func); if (ret < 0) { goto endCall; } @@ -112,6 +148,7 @@ int app_recv_proc(void* msg, struct beeQ* q) { int lret; lua_State *L = (lua_State *)(q->qarg); char *body; + int err_func; if (counter != sighup_counter) { // check counter for signal. lua_close(L); @@ -123,7 +160,6 @@ int app_recv_proc(void* msg, struct beeQ* q) { q->qarg = L; counter = sighup_counter; } - body = malloc(len); // http://www.lua.org/manual/5.1/manual.html#lua_pushlstring //Pushes the string pointed to by s with size len onto the stack. // Lua makes (or reuses) an internal copy of the given string, @@ -134,13 +170,13 @@ int app_recv_proc(void* msg, struct beeQ* q) { goto endMem; } memcpy(body, &pMsg->body[0], len); + err_func = lua_gettop(L); lua_getglobal(L, "proc"); lua_pushlstring(L, body, len); - ret = lua_pcall(L, 1, 1, 0); + ret = lua_pcall(L, 1, 1, err_func); free(body); if (ret) { - perror("lua call error"); - report_lua_failed(L); + lua_check_ret(ret); goto endCall; } @@ -165,6 +201,7 @@ int app_recv_proc(void* msg, struct beeQ* q) { endReturn: endCall: free(msg); + exit(1); return ret; } @@ -188,8 +225,9 @@ int collector_qout(lua_State *L) { return 1; // return a value. } -static lua_State * app_collector_init(void* q, void* proto_q) { +static int app_collector_work(void* q, void* proto_q) { int ret; + int err_func; lua_Number lret; /* create a state and load standard library. */ @@ -199,29 +237,23 @@ static lua_State * app_collector_init(void* q, void* proto_q) { goto endNew; } luaL_openlibs(L); + err_func = lua_reg_errFunc(L); - ret = luaL_dofile(L, "collectors.lua"); + ret = lua_load_do_file(L, "../beeQ/collectors.lua"); if (ret) { - const char *msg = lua_tostring(L, -1); - perror("luaL_dofile error"); - if (msg) { - luaL_traceback(L, L, msg, 0); - fprintf(stderr, "FATAL ERROR:%s\n\n", msg); - } goto endLoad; } lua_register(L, "collector_qout", collector_qout); // call init. - lua_getglobal(L, "init"); + lua_getglobal(L, "work"); lua_pushlightuserdata(L, q); lua_pushlightuserdata(L, proto_q); lua_pushstring(L, g_yaml_file); - ret = lua_pcall(L, 3, 1, 0); + ret = lua_pcall(L, 3, 1, err_func); if (ret < 0) { - perror("luaL_call init func error"); - report_lua_failed(L); + lua_check_ret(ret); goto endCall; } @@ -235,117 +267,30 @@ static lua_State * app_collector_init(void* q, void* proto_q) { if (lret < 0) { errno = -EINVAL; ret = -1; - perror("collectors.lua init failed."); + perror("collectors.lua work failed."); goto endReturn; } - sample_period = lret; - printf("setup sample period %ds\n", sample_period); - return L; + lua_close(L); + return lret; endReturn: endCall: endLoad: lua_close(L); endNew: - return NULL; -} - -static int app_collector_work(lua_State **pL, void* q, void* proto_q) { - int ret; - lua_Number lret; - static int counter = 0; - - lua_State *L = *pL; - - if (counter != sighup_counter) { // check counter for signal. - lua_close(L); - - L = app_collector_init(q, proto_q); - if (L == NULL) { - exit(1); - } - *pL = L; - counter = sighup_counter; - } - - lua_getglobal(L, "work"); - lua_pushinteger(L, sample_period); - ret = lua_pcall(L, 1, 1, 0); - if (ret) { - perror("luaL_call init func error"); - report_lua_failed(L); - goto endCall; - } - - if (!lua_isnumber(L, -1)) { // check - errno = -EINVAL; - perror("function collectors.lua work must return a number."); - goto endReturn; - } - lret = lua_tonumber(L, -1); - lua_pop(L, 1); - if (lret < 0) { - errno = -EINVAL; - ret = -1; - perror("collectors.lua work failed."); - goto endReturn; - } - - return ret; - endReturn: - endCall: - return ret; -} - -#include -#include -typedef long bee_time_t; -static bee_time_t local_time(void) { - int ret; - struct timespec tp; - - ret = clock_gettime(CLOCK_MONOTONIC, &tp); - if (ret == 0) { - return tp.tv_sec * 1000000 + tp.tv_nsec / 1000; - } else { - perror("get clock failed."); - exit(1); - return 0; - } + return -1; } int app_collector_run(struct beeQ* q, void* arg) { int ret = 0; - lua_State *L; - lua_State **pL; struct beeQ* proto_que = (struct beeQ* )arg; - L = app_collector_init(q, proto_que); - if (L == NULL) { - ret = -1; - goto endInit; - } - pL = &L; - while (1) { - bee_time_t t1, t2, delta; - t1 = local_time(); - ret = app_collector_work(pL, q, proto_que); + ret = app_collector_work(q, proto_que); if (ret < 0) { - goto endLoop; - } - t2 = local_time(); - - delta = t1 + sample_period * 1000000 - t2; - - if (delta > 0) { - usleep(delta); + perror("collect work run failed."); + break; } } - - lua_close(L); - return 0; - endLoop: - endInit: return ret; } diff --git a/source/tools/monitor/unity/beeQ/bees.c b/source/tools/monitor/unity/beeQ/bees.c index c962fe73b92cd7256dbc75351d1a9daa6d6432a4..f6174e49c4641bb0fb49da394a16ec38351d0f7e 100644 --- a/source/tools/monitor/unity/beeQ/bees.c +++ b/source/tools/monitor/unity/beeQ/bees.c @@ -15,14 +15,23 @@ volatile int sighup_counter = 0; char *g_yaml_file = NULL; +static pthread_t pid_collector = 0; +static pthread_t pid_outline = 0; void sig_handler(int num) { printf("receive the signal %d.\n", num); - if (num == SIGHUP) { - sighup_counter ++; - } else { - exit(1); + switch (num) { + case SIGHUP: + sighup_counter ++; + pthread_kill(pid_collector, SIGUSR1); + pthread_kill(pid_outline, SIGUSR1); + break; + case SIGUSR1: // to stop + break; + default: + printf("signal %d exit.\n", num); + exit(1); } } @@ -36,6 +45,7 @@ int main(int argc, char *argv[]) { } signal(SIGHUP, sig_handler); + signal(SIGUSR1, sig_handler); signal(SIGINT, sig_handler); q = beeQ_init(RUN_QUEUE_SIZE, @@ -49,9 +59,15 @@ int main(int argc, char *argv[]) { if (proto_que == NULL) { exit(1); } - beeQ_send_thread(q, proto_que, app_collector_run); + pid_collector = beeQ_send_thread(q, proto_que, app_collector_run); + if (pid_collector == 0) { + exit(1); + } - outline_init(q, g_yaml_file); + pid_outline = outline_init(q, g_yaml_file); + if (pid_outline == 0) { + exit(1); + } beaver_init(g_yaml_file); fprintf(stderr, "loop exit."); diff --git a/source/tools/monitor/unity/beeQ/bees.lua b/source/tools/monitor/unity/beeQ/bees.lua index 70658b0440bfa50aff235df8d6e9e9b538038305..56e23285a00d249a33c31956d4ad928d68cb37ad 100644 --- a/source/tools/monitor/unity/beeQ/bees.lua +++ b/source/tools/monitor/unity/beeQ/bees.lua @@ -10,9 +10,11 @@ local CfoxRecv = require("beeQ.foxRecv") local unistd = require("posix.unistd") -local fox = CfoxRecv.new() +local fox -function init(tid) +function init(tid, fYaml) + fYaml = fYaml or "../collector/plugin.yaml" + fox = CfoxRecv.new(fYaml) print(string.format("hello beeQ, pid: %d, tid: %d", unistd.getpid(), tid)) return 0 end diff --git a/source/tools/monitor/unity/beeQ/collectors.lua b/source/tools/monitor/unity/beeQ/collectors.lua index 192d8e4fb2e35eaf3cd0dbeed9c30dd7af162202..e92a06f58f49a9eae6fe207bdadcb433f183d16c 100644 --- a/source/tools/monitor/unity/beeQ/collectors.lua +++ b/source/tools/monitor/unity/beeQ/collectors.lua @@ -5,10 +5,92 @@ --- package.path = package.path .. ";../?.lua;" -local Cloop = require("collector.loop") +local dirent = require("posix.dirent") +local unistd = require("posix.unistd") +local stat = require("posix.sys.stat") +local bit = require("bit") local system = require("common.system") +local ptime = require("posix.time") -workLoop = nil +local srcPath = "../collector/native/" +local dstPath = "../collector/lib/" + +local function listSrc(path) + local res = {} + local files = dirent.files(path) + for f in files do + if string.find(f, "%.so") then + table.insert(res, f) + end + end + return res +end + +local function checkDst() + if unistd.access(dstPath) then + local pstat = stat.stat(dstPath) + if stat.S_ISDIR(pstat.st_mode) == 0 then + error(string.format("dst %s is no a dictionary", dstPath)) + end + else + print("mkdir " .. dstPath) + local _, s, errno = stat.mkdir(dstPath) + if errno then + error(string.format("mkdir %s failed ,report %s. %d", dstPath), s, errno) + end + end +end + +local function checkSo(fPath) + local fSrc = srcPath .. fPath + local fDst = dstPath .. fPath + + if unistd.access(fDst) then + local sSrc = stat.stat(fSrc) + local sDst = stat.stat(fDst) + + if sSrc.st_mtime > sDst.st_mtime then -- modified + return true + else + return false + end + else -- exit + return true + end +end + +local function copySo(fPath) + local fSrc = srcPath .. fPath + local fDst = dstPath .. fPath + + local sFile, err = io.open(fSrc,"rb") + if err then + error(string.format("open file %s report %s."), fSrc, err) + end + local stream = sFile:read("*a") + sFile:close() + + local dFile, err = io.open(fDst,"wb") + if err then + error(string.format("open file %s report %s."), fDst, err) + end + dFile:write(stream) + dFile:close() + + stat.chmod(fDst, bit.bor(stat.S_IRWXU, stat.S_IRGRP, stat.S_IROTH)) +end + +local function checkSos() + print(unistd.getcwd()) + checkDst() + local so_s = listSrc(srcPath) + for _, so in ipairs(so_s) do + if checkSo(so) then + print("need copy " .. so) + copySo(so) + end + end +end local function setupFreq(fYaml) local conf = system:parseYaml(fYaml) @@ -25,14 +107,34 @@ local function setupFreq(fYaml) end end -function init(que, proto_q, yaml) - local fYaml = yaml or "../collector/plugin.yaml" - local work = Cloop.new(que, proto_q, fYaml) - workLoop = work - return setupFreq(fYaml) +local function calcSleep(hope, now) + if hope.tv_nsec >= now.tv_nsec then + return {tv_sec = hope.tv_sec - now.tv_sec, + tv_nsec = hope.tv_nsec - now.tv_nsec} + else + return {tv_sec = hope.tv_sec - now.tv_sec - 1, + tv_nsec = 1e9 + hope.tv_nsec - now.tv_nsec} + end end -function work(t) - workLoop:work(t) - return 0 +function work(que, proto_q, yaml) + local fYaml = yaml or "../collector/plugin.yaml" + checkSos() + local Cloop = require("collector.loop") + local w = Cloop.new(que, proto_q, fYaml) + local unit = setupFreq(fYaml) + local tStart = ptime.clock_gettime(ptime.CLOCK_MONOTONIC) + while true do + w:work(unit) + local now = ptime.clock_gettime(ptime.CLOCK_MONOTONIC) + local hope = {tv_sec = tStart.tv_sec + unit, tv_nsec = tStart.tv_sec} + local diff = calcSleep(hope, now) + assert(diff.tv_sec >= 0) + local _, s, errno, _ = ptime.nanosleep(diff) + if errno then -- interrupt by signal + print(string.format("new sleep stop. %d, %s", errno, s)) + return 0 + end + tStart = hope + end end diff --git a/source/tools/monitor/unity/beeQ/foxRecv.lua b/source/tools/monitor/unity/beeQ/foxRecv.lua index dfb52a3964e429e2e9543629de4e8ad67fea253c..5f1e88bf75b1259d2810f96aee1364d6f6605a7a 100644 --- a/source/tools/monitor/unity/beeQ/foxRecv.lua +++ b/source/tools/monitor/unity/beeQ/foxRecv.lua @@ -10,8 +10,8 @@ local CfoxTSDB = require("tsdb.foxTSDB") local CfoxRecv = class("CfoxRecv") -function CfoxRecv:_init_() - self._fox = CfoxTSDB.new() +function CfoxRecv:_init_(fYaml) + self._fox = CfoxTSDB.new(fYaml) self._fox:setupWrite() end diff --git a/source/tools/monitor/unity/beeQ/lib/beeQ.c b/source/tools/monitor/unity/beeQ/lib/beeQ.c index 74e91a6bbab779fba60273c1a02532e447b9a207..a60afa85102112d997a212adca304f41f874da15 100644 --- a/source/tools/monitor/unity/beeQ/lib/beeQ.c +++ b/source/tools/monitor/unity/beeQ/lib/beeQ.c @@ -298,5 +298,5 @@ pthread_t beeQ_send_thread(struct beeQ *q, void *sarg, int (*cb)(struct beeQ *q, failThread: free(msg); failMalloc: - return res; + return 0; } diff --git a/source/tools/monitor/unity/beeQ/pack.sh b/source/tools/monitor/unity/beeQ/pack.sh index 7595760e157e44dc302e348ac11c6216ad01ce3a..8b67eb5b27edde594401459c1807865cece862ea 100755 --- a/source/tools/monitor/unity/beeQ/pack.sh +++ b/source/tools/monitor/unity/beeQ/pack.sh @@ -22,7 +22,9 @@ cp -r /usr/local/share/lua/5.1/* ${DIST}/lua/ mkdir ${APP} mkdir ${APP}/beaver mkdir ${APP}/beaver/native +mkdir ${APP}/beaver/query cp -r beaver/guide ${APP}/beaver/ +cp -r beaver/query ${APP}/beaver/ cp beaver/*.lua ${APP}/beaver/ cp beaver/native/*.lua ${APP}/beaver/native @@ -35,9 +37,11 @@ cp beeQ/run.sh ${APP}/beeQ/ mkdir ${APP}/collector mkdir ${APP}/collector/native +mkdir ${APP}/collector/outline cp collector/native/*.so* ${APP}/collector/native/ cp collector/native/*.lua ${APP}/collector/native/ cp collector/*.lua ${APP}/collector/ +cp collector/outline/*.lua ${APP}/collector/outline cp collector/plugin.yaml ${APP}/collector/ mkdir ${APP}/common diff --git a/source/tools/monitor/unity/beeQ/proto_queue.lua b/source/tools/monitor/unity/beeQ/proto_queue.lua index f1988d89f251ea5a18d6e96a06de31248f91ac7a..ecece0f1ab04fbcc11a73fb2cec5fb63f4cfdceb 100644 --- a/source/tools/monitor/unity/beeQ/proto_queue.lua +++ b/source/tools/monitor/unity/beeQ/proto_queue.lua @@ -21,12 +21,14 @@ function CprotoQueue:que() end function CprotoQueue:load_label(unity_line, line) + local c = #line.ls for i=0, 4 - 1 do local name = self._ffi.string(unity_line.indexs[i].name) local index = self._ffi.string(unity_line.indexs[i].index) if #name > 0 then - table.insert(line.ls, {name = name, index = index}) + c = c + 1 + line.ls[c] = {name = name, index = index} else return end @@ -34,12 +36,14 @@ function CprotoQueue:load_label(unity_line, line) end function CprotoQueue:load_value(unity_line, line) + local c = #line.vs for i=0, 32 - 1 do local name = self._ffi.string(unity_line.values[i].name) local value = unity_line.values[i].value if #name > 0 then - table.insert(line.vs, {name = name, value = value}) + c = c + 1 + line.vs[c] = {name = name, value = value} else return end @@ -56,7 +60,8 @@ function CprotoQueue:load_log(unity_line, line) end function CprotoQueue:_proc(unity_lines, lines) - for i=0, unity_lines.num - 1 do + local c = #lines["lines"] + for i = 0, unity_lines.num - 1 do local unity_line = unity_lines.line[i] local line = {line = self._ffi.string(unity_line.table), ls = {}, @@ -66,7 +71,8 @@ function CprotoQueue:_proc(unity_lines, lines) self:load_label(unity_line, line) self:load_value(unity_line, line) self:load_log(unity_line, line) - table.insert(lines["lines"], line) + c = c + 1 + lines["lines"][c] = line end end diff --git a/source/tools/monitor/unity/beeQ/run.sh b/source/tools/monitor/unity/beeQ/run.sh index 0221f07c45f9aa1daae62dcbc37ceec5d5341950..9c23dcdd510933a747a26ef07e111ea320f80d48 100755 --- a/source/tools/monitor/unity/beeQ/run.sh +++ b/source/tools/monitor/unity/beeQ/run.sh @@ -9,4 +9,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../install/ export LUA_PATH="../../lua/?.lua;../../lua/?/init.lua;" export LUA_CPATH="../../lib/?.so;../../lib/loadall.so;" -./unity-mon +yaml_path=$1 +[ ! $yaml_path ] && yaml_path="/etc/sysak/plugin.yaml" + +echo $yaml_yaml_path +./unity-mon $yaml_path diff --git a/source/tools/monitor/unity/collector/conPlugin.lua b/source/tools/monitor/unity/collector/conPlugin.lua new file mode 100644 index 0000000000000000000000000000000000000000..3709afa173f0b9db4f053e50e4a0b5717058b0fe --- /dev/null +++ b/source/tools/monitor/unity/collector/conPlugin.lua @@ -0,0 +1,19 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/10 8:26 AM +--- + +require("common.class") +local system = require("common.system") +local CconPlugin = class("conPlugin") + +function CconPlugin:_init_(proto, procffi, que, proto_q, fYaml) + self._proto = proto + self._que = que + self._plugins = setup() +end + +function CconPlugin:proc(elapsed, lines) + +end \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/interface/Makefile b/source/tools/monitor/unity/collector/interface/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4b0dcf387a8656a4a4d29e4bf6b45bf306ac14ba --- /dev/null +++ b/source/tools/monitor/unity/collector/interface/Makefile @@ -0,0 +1,19 @@ +CC := gcc +CFLAG := -g -fpic +LDFLAG := -g -fpic -shared +OBJS := unity_interface.o sig_stop.o fastKsym.o +SO := libcollectorApi.so + +all: $(SO) install + +%.o: %.c + $(CC) -c $< -o $@ $(CFLAG) + +$(SO): $(OBJS) + $(CC) -o $@ $(OBJS) $(LDFLAG) + +install: $(SO) + cp $(SO) ../../beeQ/lib + +clean: + rm -f $(SO) $(OBJS) \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/interface/fastKsym.c b/source/tools/monitor/unity/collector/interface/fastKsym.c new file mode 100644 index 0000000000000000000000000000000000000000..611bf87139a7cab0aafb01a5ae3a4be423aca76a --- /dev/null +++ b/source/tools/monitor/unity/collector/interface/fastKsym.c @@ -0,0 +1,173 @@ +// +// Created by 廖肇燕 on 2022/12/18. +// + +#include "fastKsym.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static int tfd = 0; +static int sym_cnt = 0; +static struct ksym_cell * gCell = NULL; + +static int load_ksyms(int fd, int stack_only) { + int ret = 0; + int count = 0; + struct ksym_cell cell; + void * addr; + char buf[128]; + + FILE *pf = fopen("/proc/kallsyms", "r"); + + if (pf == NULL) { + ret = -errno; + fprintf(stderr, "open /proc/kallsyms failed, errno, %d, %s", errno, strerror(errno)); + goto endOpen; + } + + while (!feof(pf)) { + if (!fgets(buf, sizeof(buf), pf)) + break; + + ret = sscanf(buf, "%p %c %64s %31s", &addr, &cell.type, cell.func, cell.module); + if (ret == 3) { + cell.module[0] = '\0'; + } else if (ret < 3) { + fprintf(stderr, "bad kallsyms line: %s", buf); + goto endRead; + } + + if (!addr) + continue; + + if (stack_only && (cell.type != 't') && (cell.type != 'T')) { + continue; + } + cell.addr = (addr_t) addr; + + ret = write(fd, &cell, sizeof (cell)); + if (ret < 0) { + fprintf(stderr, "write file failed, errno, %d, %s", errno, strerror(errno)); + goto endWrite; + } + count ++; + } + + fclose(pf); + return count; + + endWrite: + endRead: + fclose(pf); + endOpen: + return ret; +} + +static int sym_cmp(const void *p1, const void *p2) +{ + return ((struct ksym_cell *)p1)->addr > ((struct ksym_cell *)p2)->addr; +} + +static int sort_ksym(int fd, int count) { + int ret = 0 ; + struct stat sb; + void *pmmap; + + ret = fstat(fd, &sb); + if (ret < 0) { + fprintf(stderr, "fstat file failed, errno, %d, %s", errno, strerror(errno)); + goto endStat; + } + + pmmap = mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (pmmap == NULL) { + fprintf(stderr, "mmap file failed, errno, %d, %s", errno, strerror(errno)); + ret = -EACCES; + goto endMmap; + } + + qsort(pmmap, count, sizeof (struct ksym_cell), sym_cmp); + + madvise(pmmap, sb.st_size, MADV_DONTNEED); + madvise(pmmap, sb.st_size, MADV_NORMAL); + gCell = (struct ksym_cell*)pmmap; + + return ret; + endMmap: + endStat: + return ret; +} + +int ksym_setup(int stack_only) { + int ret; + + FILE *pf = tmpfile(); + if (pf == NULL) { + ret = -errno; + fprintf(stderr, "open file failed, errno, %d, %s", errno, strerror(errno)); + goto endTmpfile; + } + + tfd = fileno(pf); + + ret = load_ksyms(tfd, stack_only); + if (ret < 0) { + goto endLoad; + } + sym_cnt = ret; + + ret = sort_ksym(tfd, ret); + if (ret < 0) { + goto endSort; + } + + return ret; + endSort: + endLoad: + close(tfd); + endTmpfile: + return ret; +} + +void ksym_free(void) { + munmap((void *)gCell, sym_cnt * sizeof (struct ksym_cell)); + close(tfd); +} + +struct ksym_cell* ksym_search(addr_t key) { + int start = 0, end = sym_cnt; + int mid; + + if (sym_cnt <= 0) { + printf("sym_cnt: %d", sym_cnt); + return NULL; + } + + while (start < end) { + mid = start + (end - start) / 2; + + if (key < gCell[mid].addr) { + end = mid; + } else if (key > gCell[mid].addr) { + start = mid + 1; + } else { + return &gCell[mid]; + } + } + + if (start > 0) { + if ((gCell[start - 1].addr < key) && (key < gCell[start].addr)) { + return &gCell[start - 1]; + } + } + if (start == sym_cnt) { + return &gCell[end - 1]; + } + return NULL; +} diff --git a/source/tools/monitor/unity/collector/interface/fastKsym.h b/source/tools/monitor/unity/collector/interface/fastKsym.h new file mode 100644 index 0000000000000000000000000000000000000000..31ddebae34c47407a10d791d04ac93f229fe62ba --- /dev/null +++ b/source/tools/monitor/unity/collector/interface/fastKsym.h @@ -0,0 +1,21 @@ +// +// Created by 廖肇燕 on 2022/12/18. +// + +#ifndef FASTKSYM_FASTKSYM_H +#define FASTKSYM_FASTKSYM_H + +typedef unsigned long addr_t; + +struct ksym_cell { + addr_t addr; + char func[64]; + char module[31]; + char type; +}; + +int ksym_setup(int stack_only); +void ksym_free(void); +struct ksym_cell* ksym_search(addr_t key); + +#endif //FASTKSYM_FASTKSYM_H diff --git a/source/tools/monitor/unity/collector/native/sig_stop.c b/source/tools/monitor/unity/collector/interface/sig_stop.c similarity index 50% rename from source/tools/monitor/unity/collector/native/sig_stop.c rename to source/tools/monitor/unity/collector/interface/sig_stop.c index ec3d865b26dc7de4cdbfc5c1f4090505880811fa..769e743f8cd3dfdd3dac774b1e230630771f508b 100644 --- a/source/tools/monitor/unity/collector/native/sig_stop.c +++ b/source/tools/monitor/unity/collector/interface/sig_stop.c @@ -4,8 +4,11 @@ #include "sig_stop.h" #include - #include +#include +#include +#include +#include "fastKsym.h" static volatile int working = 1; @@ -19,8 +22,8 @@ void plugin_stop(void) { void plugin_thread_stop(pthread_t tid) { if (tid > 0) { - printf("send sig stop to thread %lu\n", tid); - pthread_kill(tid, SIGQUIT); + printf("send sig user2 to thread %lu\n", tid); + pthread_kill(tid, SIGUSR2); pthread_join(tid, NULL); } } @@ -35,10 +38,29 @@ static void sig_register(void) { action.sa_handler = stop_signal_handler; sigemptyset(&action.sa_mask); action.sa_flags = 0; - sigaction(SIGQUIT, &action, NULL); + sigaction(SIGUSR2, &action, NULL); +} + +static void bump_memlock_rlimit1(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } } void plugin_init(void) { + bump_memlock_rlimit1(); + ksym_setup(1); sig_register(); working = 1; } + +void plugin_deinit(void) { + ksym_free(); +} diff --git a/source/tools/monitor/unity/collector/native/sig_stop.h b/source/tools/monitor/unity/collector/interface/sig_stop.h similarity index 100% rename from source/tools/monitor/unity/collector/native/sig_stop.h rename to source/tools/monitor/unity/collector/interface/sig_stop.h diff --git a/source/tools/monitor/unity/collector/native/unity_interface.c b/source/tools/monitor/unity/collector/interface/unity_interface.c similarity index 99% rename from source/tools/monitor/unity/collector/native/unity_interface.c rename to source/tools/monitor/unity/collector/interface/unity_interface.c index 0c6d3139d5785d14f1d4344154c3ba9dafa8af8d..41876791812da97c5f519728c12a3138e59390f9 100644 --- a/source/tools/monitor/unity/collector/native/unity_interface.c +++ b/source/tools/monitor/unity/collector/interface/unity_interface.c @@ -36,4 +36,4 @@ char *get_unity_proc(void) { char *get_unity_sys(void) { return unity_sys; -} +} \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/native/unity_interface.h b/source/tools/monitor/unity/collector/interface/unity_interface.h similarity index 72% rename from source/tools/monitor/unity/collector/native/unity_interface.h rename to source/tools/monitor/unity/collector/interface/unity_interface.h index 5bb0b670c4d17d8d785eb06f56a503352a6fd49a..b1bd06352517964ea7224db20a4505f8394338d9 100644 --- a/source/tools/monitor/unity/collector/native/unity_interface.h +++ b/source/tools/monitor/unity/collector/interface/unity_interface.h @@ -5,6 +5,8 @@ #ifndef UNITY_UNITY_INTERFACE_H #define UNITY_UNITY_INTERFACE_H +void set_unity_proc(const char *path); +void set_unity_sys(const char *path); char *get_unity_proc(void); char *get_unity_sys(void); diff --git a/source/tools/monitor/unity/collector/kvProc.lua b/source/tools/monitor/unity/collector/kvProc.lua index 6f3bc3e4dfd462bb62a53ad89b2682be14e5c88b..f5ac8fac78f48de5e0697974f7c582cc282da53b 100644 --- a/source/tools/monitor/unity/collector/kvProc.lua +++ b/source/tools/monitor/unity/collector/kvProc.lua @@ -46,7 +46,7 @@ function CkvProc:proc(elapsed, lines) self:readKV(line) end self:appendLine(self._protoTable) - return self:push(lines) + self:push(lines) end return CkvProc \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/loop.lua b/source/tools/monitor/unity/collector/loop.lua index 1b2781815c62048f500f0497c54ee623e985d429..28b23464f21b2a12d2dd6a6682dbac7510cc2f34 100644 --- a/source/tools/monitor/unity/collector/loop.lua +++ b/source/tools/monitor/unity/collector/loop.lua @@ -7,19 +7,7 @@ require("common.class") local CprotoData = require("common.protoData") local procffi = require("collector.native.procffi") - -local CprocStat = require("collector.proc_stat") -local CprocMeminfo = require("collector.proc_meminfo") -local CprocVmstat = require("collector.proc_vmstat") -local CprocNetdev = require("collector.proc_netdev") -local CprocDiskstats = require("collector.proc_diskstats") -local CprocSockStat = require("collector.proc_sockstat") -local CprocSnmpStat = require("collector.proc_snmp_stat") -local CprocMounts = require("collector.proc_mounts") -local CprocStatm = require("collector.proc_statm") -local CprocBuddyinfo = require("collector.proc_buddyinfo") local Cplugin = require("collector.plugin") - local system = require("common.system") local Cloop = class("loop") @@ -27,28 +15,29 @@ local Cloop = class("loop") function Cloop:_init_(que, proto_q, fYaml) local res = system:parseYaml(fYaml) self._proto = CprotoData.new(que) - self._procs = { - CprocStat.new(self._proto, procffi, res.config.proc_path), - CprocMeminfo.new(self._proto, procffi, res.config.proc_path), - CprocVmstat.new(self._proto, procffi, res.config.proc_path), - CprocNetdev.new(self._proto, procffi, res.config.proc_path), - CprocDiskstats.new(self._proto, procffi, res.config.proc_path), - CprocSockStat.new(self._proto, procffi, res.config.proc_path), - CprocSnmpStat.new(self._proto, procffi, res.config.proc_path), - CprocMounts.new(self._proto, procffi, res.config.proc_path), - CprocStatm.new(self._proto, procffi, res.config.proc_path), - CprocBuddyinfo.new(self._proto, procffi, res.config.proc_path), - } + self:loadLuaPlugin(res, res.config.proc_path) self._plugin = Cplugin.new(self._proto, procffi, que, proto_q, fYaml) end +function Cloop:loadLuaPlugin(res, proc_path) + local luas = res.luaPlugins + + self._procs = {} + if res.luaPlugins then + for i, plugin in ipairs(luas) do + local CProcs = require("collector." .. plugin) + self._procs[i] = CProcs.new(self._proto, procffi, proc_path) + end + end + print("add " .. #self._procs .. " lua plugin.") +end + function Cloop:work(t) local lines = self._proto:protoTable() - for k, obj in pairs(self._procs) do - lines = obj:proc(t, lines) + for _, obj in pairs(self._procs) do + obj:proc(t, lines) end - lines = self._plugin:proc(t, lines) - --print(#lines.lines) + self._plugin:proc(t, lines) local bytes = self._proto:encode(lines) self._proto:que(bytes) end diff --git a/source/tools/monitor/unity/collector/native/Makefile b/source/tools/monitor/unity/collector/native/Makefile index 03d3d2f896ef24e04cd05bbb42b30b30553beed7..b738b35e1b53fdbbb46d6ccda2cc7b58fc38eabb 100644 --- a/source/tools/monitor/unity/collector/native/Makefile +++ b/source/tools/monitor/unity/collector/native/Makefile @@ -1,7 +1,7 @@ CC := gcc CFLAG := -g -fpic LDFLAG := -g -fpic -shared -OBJS := procffi.o sig_stop.o unity_interface.o +OBJS := procffi.o ffi_unity_api.o SO := libprocffi.so all: $(SO) diff --git a/source/tools/monitor/unity/collector/native/ffi_unity_api.c b/source/tools/monitor/unity/collector/native/ffi_unity_api.c new file mode 100644 index 0000000000000000000000000000000000000000..35d9a69a7d1cad3471be6003e6d8f808ed03f1bf --- /dev/null +++ b/source/tools/monitor/unity/collector/native/ffi_unity_api.c @@ -0,0 +1,27 @@ +// +// Created by 廖肇燕 on 2023/3/5. +// + +#include "ffi_unity_api.h" +#include "../interface/unity_interface.h" +#include "../interface/sig_stop.h" + +void ffi_set_unity_proc(const char *path) { + set_unity_proc(path); +} + +void ffi_set_unity_sys(const char *path) { + set_unity_sys(path); +} + +void ffi_plugin_init(void) { + plugin_init(); +} + +void ffi_plugin_stop(void) { + plugin_stop(); +} + +void ffi_plugin_deinit(void) { + plugin_deinit(); +} diff --git a/source/tools/monitor/unity/collector/native/ffi_unity_api.h b/source/tools/monitor/unity/collector/native/ffi_unity_api.h new file mode 100644 index 0000000000000000000000000000000000000000..758d8f4b70a49f790d870fb473d4bec377117879 --- /dev/null +++ b/source/tools/monitor/unity/collector/native/ffi_unity_api.h @@ -0,0 +1,14 @@ +// +// Created by 廖肇燕 on 2023/3/5. +// + +#ifndef UNITY_FFI_UNITY_API_H +#define UNITY_FFI_UNITY_API_H + +void ffi_set_unity_proc(const char *path); +void ffi_set_unity_sys(const char *path); +void ffi_plugin_init(void); +void ffi_plugin_stop(void); +void ffi_plugin_deinit(void); + +#endif //UNITY_FFI_UNITY_API_H diff --git a/source/tools/monitor/unity/collector/native/plugincffi.lua b/source/tools/monitor/unity/collector/native/plugincffi.lua index dccb08a4be62d614f5a3d7388fead58d9e82a3f5..037ba320191235b2a7cdead8373e7c9cdbbffbf7 100644 --- a/source/tools/monitor/unity/collector/native/plugincffi.lua +++ b/source/tools/monitor/unity/collector/native/plugincffi.lua @@ -38,6 +38,7 @@ int call(int t, struct unity_lines* lines); void deinit(void); void free(void *p); +int setns(int fd, int nstype); ]] return ffi diff --git a/source/tools/monitor/unity/collector/native/procffi.lua b/source/tools/monitor/unity/collector/native/procffi.lua index 85a2ed6a79f470107b75260ee33d9c8b327ee2b7..d7e64a3b5f5c50bdf157454200300832921f06ab 100644 --- a/source/tools/monitor/unity/collector/native/procffi.lua +++ b/source/tools/monitor/unity/collector/native/procffi.lua @@ -27,12 +27,11 @@ int var_input_long(const char * line, struct var_long *p); int var_input_string(const char * line, struct var_string *p); int var_input_kvs(const char * line, struct var_kvs *p); -void set_unity_proc(const char *path); -void set_unity_sys(const char *path); - -int plugin_is_working(void); -void plugin_stop(void); -void plugin_init(void); +void ffi_set_unity_proc(const char *path); +void ffi_set_unity_sys(const char *path); +void ffi_plugin_init(void); +void ffi_plugin_stop(void); +void ffi_plugin_deinit(void); ]] return {ffi = ffi, cffi=cffi} diff --git a/source/tools/monitor/unity/collector/outline/outline.c b/source/tools/monitor/unity/collector/outline/outline.c index 753049affaf550a81e25baad459c30047caf1b11..90ec914fe35f5f92996083b52bad70c30f670cb7 100644 --- a/source/tools/monitor/unity/collector/outline/outline.c +++ b/source/tools/monitor/unity/collector/outline/outline.c @@ -5,23 +5,19 @@ #include "outline.h" #include -LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, int level); +extern int lua_reg_errFunc(lua_State *L); +extern int lua_check_ret(int ret); +int lua_load_do_file(lua_State *L, const char* path); -static void report_lua_failed(lua_State *L) { - fprintf(stderr, "\nFATAL ERROR:%s\n\n", lua_tostring(L, -1)); -} - -static int call_init(lua_State *L, void* q, char *fYaml) { +static int call_init(lua_State *L, int err_func, void* q, char *fYaml) { int ret; lua_Number lret; lua_getglobal(L, "init"); lua_pushlightuserdata(L, q); lua_pushstring(L, fYaml); - ret = lua_pcall(L, 2, 1, 0); + ret = lua_pcall(L, 2, 1, err_func); if (ret) { - perror("luaL_call init func error"); - report_lua_failed(L); goto endCall; } @@ -48,6 +44,7 @@ static int call_init(lua_State *L, void* q, char *fYaml) { extern int collector_qout(lua_State *L); static lua_State * pipe_init(void* q, char *fYaml) { int ret; + int err_func; lua_Number lret; /* create a state and load standard library. */ @@ -57,21 +54,17 @@ static lua_State * pipe_init(void* q, char *fYaml) { goto endNew; } luaL_openlibs(L); + err_func = lua_reg_errFunc(L); - ret = luaL_dofile(L, "outline.lua"); + ret = lua_load_do_file(L, "../beeQ/outline.lua"); if (ret) { - const char *msg = lua_tostring(L, -1); - perror("luaL_dofile error"); - if (msg) { - luaL_traceback(L, L, msg, 0); - fprintf(stderr, "FATAL ERROR:%s\n\n", msg); - } goto endLoad; } lua_register(L, "collector_qout", collector_qout); - ret = call_init(L, q, fYaml); - if (ret < 0) { + ret = call_init(L, err_func, q, fYaml); + if (ret) { + lua_check_ret(ret); goto endCall; } return L; @@ -85,13 +78,14 @@ static lua_State * pipe_init(void* q, char *fYaml) { static int work(lua_State *L) { int ret; + int err_func; lua_Number lret; + err_func = lua_gettop(L); lua_getglobal(L, "work"); - ret = lua_pcall(L, 0, 1, 0); + ret = lua_pcall(L, 0, 1, err_func); if (ret) { - perror("lua call error"); - report_lua_failed(L); + lua_check_ret(ret); goto endCall; } @@ -108,10 +102,12 @@ static int work(lua_State *L) { perror("beaver.lua echo failed."); goto endReturn; } + lua_close(L); return ret; endReturn: endCall: + lua_close(L); return ret; } @@ -139,8 +135,8 @@ static int outline_run(struct beeQ* q, void* arg) { return ret; } -int outline_init(struct beeQ* pushQ, char *fYaml) { - pthread_t tid; +pthread_t outline_init(struct beeQ* pushQ, char *fYaml) { + pthread_t tid = 0; tid = beeQ_send_thread(pushQ, fYaml, outline_run); return tid; diff --git a/source/tools/monitor/unity/collector/outline/outline.h b/source/tools/monitor/unity/collector/outline/outline.h index 4f82428bf0cbc6419cfbd72a7cbe4816ec7fccf3..2bbe30ea1950dd0d0a70aca368285812a9fd8074 100644 --- a/source/tools/monitor/unity/collector/outline/outline.h +++ b/source/tools/monitor/unity/collector/outline/outline.h @@ -8,6 +8,6 @@ #include "../../beeQ/beeQ.h" #include #include -int outline_init(struct beeQ* pushQ, char *fYaml); +pthread_t outline_init(struct beeQ* pushQ, char *fYaml); #endif //UNITY_OUTLINE_H diff --git a/source/tools/monitor/unity/collector/outline/pipeMon.lua b/source/tools/monitor/unity/collector/outline/pipeMon.lua index 1298192d7326ae3241c5523890517b17cea40be2..81f2069698ef18cb43d89eff15d38ae33415f5bf 100644 --- a/source/tools/monitor/unity/collector/outline/pipeMon.lua +++ b/source/tools/monitor/unity/collector/outline/pipeMon.lua @@ -35,11 +35,11 @@ end function CpipeMon:setupPipe(fYaml) local res = system:parseYaml(fYaml) - for _, path in ipairs(res.outline) do + for i, path in ipairs(res.outline) do if unistd.access(path) then unistd.unlink(path) end - table.insert(self._paths, path) + self._paths[i] = path socket.unix = require("socket.unix") local s = socket.unix.udp() @@ -58,14 +58,20 @@ local function trans(title, ls, vs, log) local values = {} local logs = {} + local c = 0 for k, v in pairs(ls) do - table.insert(labels, {name=k, index=v}) + c = c + 1 + labels[c] = {name=k, index=v} end + c = 0 for k, v in pairs(vs) do - table.insert(values, {name=k, value=v}) + c = c + 1 + values[c] = {name=k, value=v} end + c = 0 for k, v in pairs(log) do - table.insert(logs, {name=k, log=v}) + c = c + 1 + logs[c] = {name=k, log=v} end return {line = title, ls = labels, vs = values, log = logs} end diff --git a/source/tools/monitor/unity/collector/plugin.lua b/source/tools/monitor/unity/collector/plugin.lua index 41b217465bd1bb10dc5815c4375feeb7d9468a0b..57823c0fec1cbf98c51ad7be78fa62fefc495208 100644 --- a/source/tools/monitor/unity/collector/plugin.lua +++ b/source/tools/monitor/unity/collector/plugin.lua @@ -15,26 +15,27 @@ function Cplugin:_init_(proto, procffi, que, proto_q, fYaml) self:setProcSys(procffi, res.config) self._sig_cffi = procffi["cffi"] - self._sig_cffi.plugin_init() + self._sig_cffi.ffi_plugin_init() self._ffi = require("collector.native.plugincffi") self:setup(res.plugins, proto_q) end function Cplugin:_del_() - self._sig_cffi.plugin_stop() + self._sig_cffi.ffi_plugin_stop() for _, plugin in ipairs(self._plugins) do local cffi = plugin.cffi cffi.deinit() end + self._sig_cffi.ffi_plugin_deinit() end function Cplugin:setProcSys(procFFI, config) local proc = config["proc_path"] or "/" local sys = config["sys_path"] or "/" - procFFI.cffi.set_unity_proc(procFFI.ffi.string(proc)) - procFFI.cffi.set_unity_sys(procFFI.ffi.string(sys)) + procFFI.cffi.ffi_set_unity_proc(procFFI.ffi.string(proc)) + procFFI.cffi.ffi_set_unity_sys(procFFI.ffi.string(sys)) end function Cplugin:setup(plugins, proto_q) @@ -55,12 +56,14 @@ function Cplugin:setup(plugins, proto_q) end function Cplugin:load_label(unity_line, line) + local c = #line.ls for i=0, 4 - 1 do local name = self._ffi.string(unity_line.indexs[i].name) local index = self._ffi.string(unity_line.indexs[i].index) if #name > 0 then - table.insert(line.ls, {name = name, index = index}) + c = c + 1 + line.ls[c] = {name = name, index = index} else return end @@ -68,12 +71,14 @@ function Cplugin:load_label(unity_line, line) end function Cplugin:load_value(unity_line, line) + local c = #line.vs for i=0, 32 - 1 do local name = self._ffi.string(unity_line.values[i].name) local value = unity_line.values[i].value if #name > 0 then - table.insert(line.vs, {name = name, value = value}) + c = c + 1 + line.vs[c] = {name = name, value = value} else return end @@ -90,6 +95,7 @@ function Cplugin:load_log(unity_line, line) end function Cplugin:_proc(unity_lines, lines) + local c = #lines["lines"] for i=0, unity_lines.num - 1 do local unity_line = unity_lines.line[i] local line = {line = self._ffi.string(unity_line.table), @@ -100,7 +106,8 @@ function Cplugin:_proc(unity_lines, lines) self:load_label(unity_line, line) self:load_value(unity_line, line) self:load_log(unity_line, line) - table.insert(lines["lines"], line) + c = c + 1 + lines["lines"][c] = line end end @@ -114,7 +121,6 @@ function Cplugin:proc(t, lines) end self._ffi.C.free(unity_lines.line) -- should free memory. end - return lines end return Cplugin diff --git a/source/tools/monitor/unity/collector/plugin.yaml b/source/tools/monitor/unity/collector/plugin.yaml index d622f53a952933331db79fac1998cb4ffaaa295a..2b9821f6e91b3de2a92ba9c0bf84e22ace2e8b36 100644 --- a/source/tools/monitor/unity/collector/plugin.yaml +++ b/source/tools/monitor/unity/collector/plugin.yaml @@ -4,15 +4,27 @@ config: bind_addr: 0.0.0.0 # bind ip backlog: 32 # listen backlog identity: # support hostip, curl(need url arg), hostname, file(need path arg), specify(need name arg) - mode: specify - name: test_specify + mode: curl + url: "http://100.100.100.200/latest/meta-data/instance-id" +# name: test_specify # mode: hostip - proc_path: /mnt/home/ # in container mode, like -v /:/mnt/host , should use /mnt/host/ +# real_timestamps: true +# unix_socket: "/tmp/sysom_unity.sock" + proc_path: /mnt/host/ # in container mode, like -v /:/mnt/host , should use /mnt/host/ # proc_path: / # in container mode, like -v /:/mnt/host , should use /mnt/host/ + db: + rotate: 7 # tsdb file retention time, unit day + budget: 200 # max query buffer from tsdb. outline: - /tmp/sysom +luaPlugins: ["proc_buddyinfo", "proc_diskstats", "proc_meminfo", "proc_mounts", "proc_netdev", + "proc_snmp_stat", "proc_sockstat", "proc_stat", "proc_statm", "proc_vmstat", + "proc_uptime"] + + + plugins: - so: kmsg description: "collect dmesg info." @@ -22,9 +34,9 @@ plugins: - so: sample_threads description: "threads example." -# - -# so: bpfsample2 -# description: "bpf threads example." + - + so: bpfsample2 + description: "bpf threads example." - so: proc_schedstat @@ -33,6 +45,26 @@ plugins: so: proc_loadavg description: "collect load avg" + - + so: unity_nosched + description: "nosched:sys hold cpu and didn't scheduling" + - so: net_health + description: "tcp net health." + - so: net_retrans + description: "tcp retrans monitor." + - + so: unity_irqoff + description: "irqoff:detect irq turned off and can't response" + #- + # so: numainfo + # description: "collect numainfo" + # - + # so: cpufreq + # description: "collect cpufreq" + - + so: gpuinfo + description: "collect gpuinfo" + metrics: - title: sysak_proc_cpu_total @@ -123,5 +155,66 @@ metrics: - title: sysak_proc_buddyinfo from: buddyinfo head: value - help: "buddyinfo of system from /proc/loadavg" + help: "buddyinfo of system from /proc/buddyinfo" + type: "gauge" + - title: sysak_IOMonIndForDisksIO + from: IOMonIndForDisksIO + head: value + help: "Disk IO indicators and abnormal events" + type: "gauge" + - title: sysak_IOMonIndForSystemIO + from: IOMonIndForSystemIO + head: value + help: "System indicators and abnormal events about IO" + type: "gauge" + - title: sysak_IOMonDiagLog + from: IOMonDiagLog + head: value + help: "Diagnose log for IO exception" + type: "gauge" + - title: sched_moni_jitter + from: sched_moni_jitter + head: value + help: "nosched/irqoff:sys and irqoff hold cpu and didn't scheduling" + type: "gauge" + - title: sysak_cpu_dist + from: cpu_dist + head: value + help: "task cpu sched dist." + type: "gauge" + - title: sysak_net_health_hist + from: net_health_hist + head: value + help: "net_health_hist" type: "gauge" + - title: sysak_net_health_count + from: net_health_count + head: value + help: "net_health_count" + type: "gauge" + - title: sysak_net_retrans_count + from: net_retrans_count + head: value + help: "net_retrans_count" + type: "gauge" + #- title: sysak_numainfo + # from: numainfo + # head: value + # help: "numainfo of system from /sys/devices/system/" + # type: "gauge" + # - title: sysak_proc_cpufreq + # from: cpufreq + # head: value + # help: "cpufreq of system from /proc/cpuinfo" + # type: "gauge" + - title: sysak_gpuinfo + from: gpuinfo + head: value + help: "gpuinfo of system from nvidia-smi" + type: "gauge" + + #- title: sysak_pod_alloc + #from: pod_alloc + #head: value + #help: "get pod alloc page used" + #type: "gauge" diff --git a/source/tools/monitor/unity/collector/plugin/Makefile b/source/tools/monitor/unity/collector/plugin/Makefile index 94f8322c6261e7c71f1c302624498a493644575f..794d89005bf7d4e1833a0297dc0553181f20d4ae 100644 --- a/source/tools/monitor/unity/collector/plugin/Makefile +++ b/source/tools/monitor/unity/collector/plugin/Makefile @@ -4,7 +4,9 @@ LDFLAG := -g -fpic -shared OBJS := proto_sender.o LIB := libproto_sender.a -DEPMOD=sample threads kmsg proc_schedstat proc_loadavg bpfsample2 + +DEPMOD=sample threads kmsg proc_schedstat proc_loadavg unity_nosched unity_irqoff cpudist net_health net_retrans netlink cpufreq gpuinfo + all: $(LIB) $(DEPMOD) diff --git a/source/tools/monitor/unity/collector/plugin/bpf_head.h b/source/tools/monitor/unity/collector/plugin/bpf_head.h index 6acf256f68c6902a7ea1604f3df1d4a5586788ad..f02896f3a17539f0d6f4df5026d5371eb8ae4939 100644 --- a/source/tools/monitor/unity/collector/plugin/bpf_head.h +++ b/source/tools/monitor/unity/collector/plugin/bpf_head.h @@ -4,15 +4,22 @@ #ifndef UNITY_BPF_HEAD_H #define UNITY_BPF_HEAD_H +#include -#define DEFINE_SEKL_OBJECT(skel_name) \ - struct skel_name##_bpf *skel_name = NULL; \ - static pthread_t perf_thread = 0; +#ifdef COOLBPF_PERF_THREAD -#define DESTORY_SKEL_BOJECT(skel_name) \ - if (perf_thread > 0) \ - kill_perf_thread(perf_thread); \ - skel_name##_bpf__destroy(skel_name) +#define DEFINE_SEKL_OBJECT(skel_name) \ + struct skel_name##_bpf *skel_name = NULL; \ + static pthread_t perf_thread = 0; \ + int thread_worker(struct beeQ *q, void *arg) \ + { \ + perf_thread_worker(arg); \ + return 0; \ + } \ + void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) \ + { \ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); \ + } #define LOAD_SKEL_OBJECT(skel_name, perf) \ ( \ @@ -40,7 +47,7 @@ DESTORY_SKEL_BOJECT(skel_name); \ goto load_bpf_skel_out; \ } \ - struct perf_thread_arguments *perf_args = malloc(sizeof(struct perf_thread_arguments)); \ + struct perf_thread_arguments *perf_args = calloc(1, sizeof(struct perf_thread_arguments)); \ if (!perf_args) \ { \ __ret = -ENOMEM; \ @@ -57,4 +64,51 @@ __ret; \ }) +#define DESTORY_SKEL_BOJECT(skel_name) \ + if (perf_thread != 0) \ + plugin_thread_stop(perf_thread); \ + skel_name##_bpf__destroy(skel_name); +#else +#define DEFINE_SEKL_OBJECT(skel_name) \ + struct skel_name##_bpf *skel_name = NULL; + +#define LOAD_SKEL_OBJECT(skel_name, perf) \ + ( \ + { \ + __label__ load_bpf_skel_out; \ + int __ret = 0; \ + skel_name = skel_name##_bpf__open(); \ + if (!skel_name) \ + { \ + printf("failed to open BPF object\n"); \ + __ret = -1; \ + goto load_bpf_skel_out; \ + } \ + __ret = skel_name##_bpf__load(skel_name); \ + if (__ret) \ + { \ + printf("failed to load BPF object: %d\n", __ret); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + __ret = skel_name##_bpf__attach(skel_name); \ + if (__ret) \ + { \ + printf("failed to attach BPF programs: %s\n", strerror(-__ret)); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + load_bpf_skel_out: \ + __ret; \ + }) + +#define DESTORY_SKEL_BOJECT(skel_name) \ + skel_name##_bpf__destroy(skel_name); +#endif + +#define coobpf_map_find(OBJ, NAME) bpf_object__find_map_fd_by_name(OBJ, NAME) +#define coobpf_key_next(FD, KEY, NEXT) bpf_map_get_next_key(FD, KEY, NEXT) +#define coobpf_key_value(FD, KEY, VALUE) bpf_map_lookup_elem(FD, KEY, VALUE) + +#include "plugin_head.h" #endif //UNITY_BPF_HEAD_H diff --git a/source/tools/monitor/unity/collector/plugin/bpfsample/Makefile b/source/tools/monitor/unity/collector/plugin/bpfsample/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..0b8e79a20ed2d65dd829f77bf3be3032ea35e280 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/bpfsample/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := bpfsample.bpf.c +csrcs := bpfsample.c +so := libbpfsample.so + +include ../bpfso.mk \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.bpf.c b/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..12556f2a0323c461d9396571defc7ddaa135ccf5 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.bpf.c @@ -0,0 +1,23 @@ + + +#include +#include +#include "bpfsample.h" + + + +BPF_ARRAY(count, u64, 200); + +SEC("kprobe/netstat_seq_show") +int BPF_KPROBE(netstat_seq_show, struct sock *sk, struct msghdr *msg, size_t size) +{ + int default_key = 0; + u64 *value = bpf_map_lookup_elem(&count, &default_key); + if (value) { + __sync_fetch_and_add(value, 1); + } + return 0; +} + + + diff --git a/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.c b/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.c new file mode 100644 index 0000000000000000000000000000000000000000..5bd980b1d1abe2e25adec116a86d2370a09ff30a --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.c @@ -0,0 +1,53 @@ + + +#include "bpfsample.h" +#include "bpfsample.skel.h" + +#include +#include +#include "../../../../unity/beeQ/beeQ.h" + +struct coolbpf_object *cb = NULL; +int countfd = 0; + +int init(void *arg) +{ + cb = coolbpf_object_new(bpfsample); + if (!cb) { + printf("Failed to create coolbpf object\n"); + return -EINVAL; + } + + countfd = coolbpf_object_find_map(cb, "count"); + if (countfd < 0) { + printf("Failed to get count map fd\n"); + return countfd; + } + printf("bpfsample plugin install.\n"); + printf("count map fd is %d\n", countfd); + return 0; +} + +int call(int t, struct unity_lines *lines) +{ + int default_key = 0; + uint64_t count = 0; + uint64_t default_count = 0; + struct unity_line* line; + + bpf_map_lookup_elem(countfd, &default_key, &count); + bpf_map_update_elem(countfd, &default_key, &default_count, BPF_ANY); + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "bpfsample"); + unity_set_value(line, 0, "value", count); + + return 0; +} + +void deinit(void) +{ + printf("bpfsample plugin uninstall.\n"); + coolbpf_object_destroy(cb); +} diff --git a/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.h b/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.h new file mode 100644 index 0000000000000000000000000000000000000000..15dcaa720db3321281d9c7a7fbbded604e26d875 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/bpfsample/bpfsample.h @@ -0,0 +1,16 @@ + + +#ifndef BPF_SAMPLE_H +#define BPF_SAMPLE_H + +#ifndef __VMLINUX_H__ + +#include "../plugin_head.h" + +int init(void *arg); +int call(int t, struct unity_lines *lines); +void deinit(void); + +#endif + +#endif diff --git a/source/tools/monitor/unity/collector/plugin/cpudist/Makefile b/source/tools/monitor/unity/collector/plugin/cpudist/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9fc693eb4513feaebc49b5ccf1143651a8a192a3 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/cpudist/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := cpudist.bpf.c +csrcs := cpudist.c +so := libcpudist.so + +include ../bpfso.mk \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.bpf.c b/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..1dc01053bd26eb1e96b899b233c1b17012c8b2db --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.bpf.c @@ -0,0 +1,40 @@ +// +// Created by 廖肇燕 on 2023/2/23. +// + +#include +#include + +BPF_ARRAY(cpudist, u64, 20); +BPF_HASH(start, u32, u64, 128 * 1024); + +struct sched_switch_args { + u16 type; + u8 flag; + u8 preeempt; + u32 c_pid; + char prev_comm[16]; + u32 prev_pid; + u32 prev_prio; + u64 prev_state; + char next_comm[16]; + u32 next_pid; + u32 next_prio; +}; +SEC("tracepoint/sched/sched_switch") +int sched_switch_hook(struct sched_switch_args *args){ + u64 ts = ns(); + u64 *pv; + u32 prev = args->prev_pid; + u32 next = args->next_pid; + + if (next > 0) { + bpf_map_update_elem(&start, &next, &ts, BPF_ANY); + } + pv = bpf_map_lookup_elem(&start, &prev); + if (pv && ts > *pv) { + hist10_push((struct bpf_map_def *)&cpudist, (ts - *pv) / 1000); + } + return 0; +} + diff --git a/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.c b/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.c new file mode 100644 index 0000000000000000000000000000000000000000..5ea28bacb5c54b54cf2852f898c8352843461953 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.c @@ -0,0 +1,86 @@ +// +// Created by 廖肇燕 on 2023/2/23. +// + + +#include "cpudist.h" +#include "../bpf_head.h" +#include "cpudist.skel.h" + +#define CPU_DIST_INDEX 8 +#define DIST_ARRAY_SIZE 20 +DEFINE_SEKL_OBJECT(cpudist); +static int dist_fd = 0; + +int init(void *arg) +{ + int ret; + printf("cpudist plugin install.\n"); + ret = LOAD_SKEL_OBJECT(cpudist, perf); + dist_fd = coobpf_map_find(cpudist->obj, "cpudist"); + return ret; +} + +static int get_dist(unsigned long *locals) { + int i = 0; + unsigned long value = 0; + int key, key_next; + + key = 0; + while (coobpf_key_next(dist_fd, &key, &key_next) == 0) { + coobpf_key_value(dist_fd, &key_next, &value); + locals[i ++] = value; + if (i > DIST_ARRAY_SIZE) { + break; + } + key = key_next; + } + return i; +} + +static int cal_dist(unsigned long* values) { + int i, j; + int size; + static unsigned long rec[DIST_ARRAY_SIZE] = {0}; + unsigned long locals[DIST_ARRAY_SIZE]; + + size = get_dist(locals); + for (i = 0; i < CPU_DIST_INDEX - 1; i ++) { + values[i] = locals[i] - rec[i]; + rec[i] = locals[i]; + } + j = i; + values[j] = 0; + for (; i < size; i ++) { + values[j] += locals[i] - rec[i]; + rec[i] = locals[i]; + } + return 0; +} + + +int call(int t, struct unity_lines *lines) +{ + int i; + unsigned long values[CPU_DIST_INDEX]; + const char *names[] = {"us1", "us10", "us100", "ms1", "ms10", "ms100", "s1", "so"}; + struct unity_line* line; + + unity_alloc_lines(lines, 1); // 预分配好 + line = unity_get_line(lines, 0); + unity_set_table(line, "cpu_dist"); + + cal_dist(values); + for (i = 0; i < CPU_DIST_INDEX; i ++ ) { + unity_set_value(line, i, names[i], values[i]); + } + + return 0; +} + +void deinit(void) +{ + printf("cpudist plugin uninstall.\n"); + DESTORY_SKEL_BOJECT(cpudist); +} + diff --git a/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.h b/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.h new file mode 100644 index 0000000000000000000000000000000000000000..1bcac9a4871119def3f042e6becc1a343e2ce298 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/cpudist/cpudist.h @@ -0,0 +1,8 @@ +// +// Created by 廖肇燕 on 2023/2/23. +// + +#ifndef UNITY_CPUDIST_H +#define UNITY_CPUDIST_H + +#endif //UNITY_CPUDIST_H diff --git a/source/tools/monitor/unity/collector/plugin/cpufreq/Makefile b/source/tools/monitor/unity/collector/plugin/cpufreq/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9f44037aeab5b8194ef8f151f60e2bb39fa3754c --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/cpufreq/Makefile @@ -0,0 +1,19 @@ +CC := gcc +CFLAG := -g -fpic +LDFLAG := -g -fpic -shared +OBJS := cpufreq.o +SO := libcpufreq.so + +all: $(SO) install + +%.o: %.c + $(CC) -c $< -o $@ $(CFLAG) + +$(SO): $(OBJS) + $(CC) -o $@ $(OBJS) $(LDFLAG) + +install: $(SO) + cp $(SO) ../../native/ + +clean: + rm -f $(SO) $(OBJS) diff --git a/source/tools/monitor/unity/collector/plugin/cpufreq/cpufreq.c b/source/tools/monitor/unity/collector/plugin/cpufreq/cpufreq.c new file mode 100644 index 0000000000000000000000000000000000000000..fe224ec44dd6b450dff7611b8e2062e0de116fac --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/cpufreq/cpufreq.c @@ -0,0 +1,73 @@ +// +// Created by muya. +// + +#include "cpufreq.h" +#include +#include + +int init(void * arg) { + printf("cpufreq plugin install, proc: %s\n", get_unity_proc()); + return 0; +} + +int call(int t, struct unity_lines* lines) { + struct unity_line* line; + int ret; + FILE *fp = NULL; + char str[128]; + int len = 0; + char result[16] = {0}; + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "cpufreq"); + + errno = 0; + + if ((fp = fopen("/proc/cpuinfo", "r")) == NULL) { + ret = errno; + printf("WARN: numainfo install FAIL fopen\n"); + return ret; + } + while (fgets(str, sizeof(str), fp)) { + char *pLast = strstr(str, "@"); + if (NULL != pLast) { + pLast = pLast + 2; + while (*pLast != 'G') + { + len++; + pLast++; + } + memcpy(result, pLast-len, len); + // printf("res is %s\n", result); + unity_set_value(line, 0, "hardware_freq", atof(result)*1000); + memset(result, 0, 16); + len = 0; + } else { + char *pLast = strstr(str, "MHz"); + char *pLast2 = strstr(str, ":"); + if (NULL != pLast && NULL != pLast2) { + pLast2 = pLast2 + 2; + while (*pLast2 != '\n') + { + len++; + pLast2++; + } + memcpy(result, pLast2-len, len); + // printf("res2 is %s, %d\n", result, len); + unity_set_value(line, 1, "curr_freq", atof(result)); + memset(result, 0, 16); + len = 0; + break; + } + } + } + if (fp) + fclose(fp); + return 0; +} + +void deinit(void) { + printf("cpufreq plugin uninstall\n"); +} diff --git a/source/tools/monitor/unity/collector/plugin/cpufreq/cpufreq.h b/source/tools/monitor/unity/collector/plugin/cpufreq/cpufreq.h new file mode 100644 index 0000000000000000000000000000000000000000..669bdeff986f16279d8d66044e16b881b6450400 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/cpufreq/cpufreq.h @@ -0,0 +1,14 @@ +// +// Created by muya. +// + +#ifndef UNITY_CPUFREQ_H +#define UNITY_CPUFREQ_H + +#include "../plugin_head.h" + +int init(void * arg); +int call(int t, struct unity_lines* lines); +void deinit(void); + +#endif //UNITY_CPUFREQ_H diff --git a/source/tools/monitor/unity/collector/plugin/gpuinfo/Makefile b/source/tools/monitor/unity/collector/plugin/gpuinfo/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bc149c0617530a760146c049202f5a2cac39e287 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/gpuinfo/Makefile @@ -0,0 +1,19 @@ +CC := gcc +CFLAG := -g -fpic +LDFLAG := -g -fpic -shared +OBJS := gpuinfo.o +SO := libgpuinfo.so + +all: $(SO) install + +%.o: %.c + $(CC) -c $< -o $@ $(CFLAG) + +$(SO): $(OBJS) + $(CC) -o $@ $(OBJS) $(LDFLAG) + +install: $(SO) + cp $(SO) ../../native/ + +clean: + rm -f $(SO) $(OBJS) diff --git a/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.c b/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.c new file mode 100644 index 0000000000000000000000000000000000000000..943f68fe0a482a9fe2941368b3091a27db23f926 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.c @@ -0,0 +1,67 @@ +// +// Created by muya. +// + +#include "gpuinfo.h" +#include + + +int init(void * arg) { + printf("gpuinfo plugin install, proc: %s\n", get_unity_proc()); + return 0; +} + +int call(int t, struct unity_lines* lines) { + + + + FILE *fp = NULL; + char buffer[256]; /* Temporary buffer for parsing */ + float mm_total, mm_used, mm_free, temp, powerdraw, gpu_util, mem_util; + struct unity_line* line; + + + // make sure nvidia-smi installed + // if use container, use -v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi + if ( access("/usr/bin/nvidia-smi",0) ) { + // printf("nvidia-smi not exists\n"); + return 0; + } + + fp = popen("nvidia-smi --query-gpu=\"memory.total,memory.used,memory.free,temperature.gpu,power.draw,utilization.gpu,utilization.memory\" --format=nounits,csv,noheader", "r"); + memset(buffer, 0, sizeof(buffer)); + + // // for test + // char command[128]; + // if (sprintf(command, "cat %s%s", get_unity_proc(), "/proc/gpuinfo") < 0) + // printf("sprintf error\n"); + // fp = popen(command, "r"); + + + if (fp != NULL) + { + while (fgets(buffer, sizeof(buffer), fp)) + { + sscanf(buffer, "%f, %f, %f, %f, %f, %f, %f", &mm_total, &mm_used, &mm_free, &temp, &powerdraw, &gpu_util, &mem_util); + } + pclose(fp); + } + + unity_alloc_lines(lines, 1); // 预分配好 + line = unity_get_line(lines, 0); + unity_set_table(line, "gpuinfo"); + unity_set_index(line, 0, "gpu_num", "gpu0"); + unity_set_value(line, 0, "mm_total", mm_total); + unity_set_value(line, 1, "mm_used", mm_used); + unity_set_value(line, 2, "mm_free", mm_free); + unity_set_value(line, 3, "temp", temp); + unity_set_value(line, 4, "powerdraw", powerdraw); + unity_set_value(line, 5, "gpu_util", gpu_util); + unity_set_value(line, 6, "mem_util", mem_util); + + return 0; +} + +void deinit(void) { + printf("gpuinfo plugin uninstall\n"); +} diff --git a/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.h b/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.h new file mode 100644 index 0000000000000000000000000000000000000000..b2d574622562b32f9ebc1a7e17e0d5c4d93d60c7 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.h @@ -0,0 +1,14 @@ +// +// Created by muya. +// + +#ifndef UNITY_GPUINFO_H +#define UNITY_GPUINFO_H + +#include "../plugin_head.h" + +int init(void * arg); +int call(int t, struct unity_lines* lines); +void deinit(void); + +#endif //UNITY_GPUINFO_H diff --git a/source/tools/monitor/unity/collector/plugin/net_health/Makefile b/source/tools/monitor/unity/collector/plugin/net_health/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..55801edc4973f417eed7f907603430c89156f8c8 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_health/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := net_health.bpf.c +csrcs := net_health.c +so := libnet_health.so + +include ../bpfso.mk \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/net_health/net_health.bpf.c b/source/tools/monitor/unity/collector/plugin/net_health/net_health.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..32aacfb70da732f31b8395890edd8b0e9ec266fb --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_health/net_health.bpf.c @@ -0,0 +1,22 @@ +// +// Created by 廖肇燕 on 2023/2/24. +// +#include +#include + +BPF_HASH(outCnt, int, u64, 2); +BPF_ARRAY(netHist, u64, 20); + +SEC("kprobe/tcp_validate_incoming") +int j_tcp_validate_incoming(struct pt_regs *ctx) { + struct tcp_sock *tp = (struct tcp_sock *)PT_REGS_PARM1(ctx); + u64 ts = BPF_CORE_READ(tp, srtt_us) >> 3; + u64 ms = ts / 1000; + if (ms > 0) { + add_hist((struct bpf_map_def *)&outCnt, 0, ms); + add_hist((struct bpf_map_def *)&outCnt, 1, 1); + hist10_push((struct bpf_map_def *)&netHist, ms); + } + return 0; +} + diff --git a/source/tools/monitor/unity/collector/plugin/net_health/net_health.c b/source/tools/monitor/unity/collector/plugin/net_health/net_health.c new file mode 100644 index 0000000000000000000000000000000000000000..de4a53093227d335c06e0c2d11b6306a57830f80 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_health/net_health.c @@ -0,0 +1,116 @@ +// +// Created by 廖肇燕 on 2023/2/24. +// + +#include "net_health.h" +#include "../bpf_head.h" +#include "net_health.skel.h" + +#define NET_DIST_INDEX 4 +#define DIST_ARRAY_SIZE 20 + +DEFINE_SEKL_OBJECT(net_health); +static int cnt_fd = 0; +static int dist_fd = 0; + +//#define ZHAOYAN_DEBUG + +int init(void *arg) +{ + int ret; + printf("net_health plugin install.\n"); + ret = LOAD_SKEL_OBJECT(net_health, perf); + cnt_fd = coobpf_map_find(net_health->obj, "outCnt"); + dist_fd = coobpf_map_find(net_health->obj, "netHist"); + return ret; +} + +static int get_dist(unsigned long *locals) { + int i = 0; + unsigned long value = 0; + int key, key_next; + + key = 0; + while (coobpf_key_next(dist_fd, &key, &key_next) == 0) { + coobpf_key_value(dist_fd, &key_next, &value); + locals[i ++] = value; + if (i > DIST_ARRAY_SIZE) { + break; + } + key = key_next; + } +#ifdef ZHAOYAN_DEBUG + for (i = 0; i < NET_DIST_INDEX; i ++) { + printf("%ld, ", locals[i]); + } + printf("\n"); +#endif + return i; +} + +static int cal_dist(unsigned long* values) { + int i, j; + int size; + static unsigned long rec[DIST_ARRAY_SIZE] = {0}; + unsigned long locals[DIST_ARRAY_SIZE]; + + size = get_dist(locals); + for (i = 0; i < NET_DIST_INDEX - 1; i ++) { + values[i] = locals[i] - rec[i]; + rec[i] = locals[i]; + } + j = i; + values[j] = 0; + for (; i < size; i ++) { + values[j] += locals[i] - rec[i]; + rec[i] = locals[i]; + } + return 0; +} + +static int get_count(unsigned long* values) { + int key; + static unsigned long rec[2]; + unsigned long now[2]; + + key = 0; + coobpf_key_value(cnt_fd, &key, &now[0]); + key = 1; + coobpf_key_value(cnt_fd, &key, &now[1]); + + values[0] = now[0] - rec[0]; rec[0] = now[0]; + values[1] = now[1] - rec[1]; rec[1] = now[1]; + return 0; +} + +int call(int t, struct unity_lines *lines) +{ + int i; + unsigned long values[NET_DIST_INDEX]; + const char *names[] = { "ms10", "ms100", "s1", "so"}; + struct unity_line* line; + + unity_alloc_lines(lines, 2); // 预分配好 + line = unity_get_line(lines, 0); + unity_set_table(line, "net_health_hist"); + + cal_dist(values); + for (i = 0; i < NET_DIST_INDEX; i ++ ) { + unity_set_value(line, i, names[i], values[i]); + } + + get_count(values); + line = unity_get_line(lines, 1); + unity_set_table(line, "net_health_count"); + unity_set_value(line, 0, "sum", values[0]); + unity_set_value(line, 1, "count", values[1]); + return 0; +} + +void deinit(void) +{ + printf("net_health plugin uninstall.\n"); + DESTORY_SKEL_BOJECT(net_health); +} + + diff --git a/source/tools/monitor/unity/collector/plugin/net_health/net_health.h b/source/tools/monitor/unity/collector/plugin/net_health/net_health.h new file mode 100644 index 0000000000000000000000000000000000000000..dd3cebc2333a98d3765226f6aa95566843829310 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_health/net_health.h @@ -0,0 +1,8 @@ +// +// Created by 廖肇燕 on 2023/2/24. +// + +#ifndef UNITY_NET_HEALTH_H +#define UNITY_NET_HEALTH_H + +#endif //UNITY_NET_HEALTH_H diff --git a/source/tools/monitor/unity/collector/plugin/net_retrans/Makefile b/source/tools/monitor/unity/collector/plugin/net_retrans/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..09638c6ef30c5b23189d99750dda9e86fed08183 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_retrans/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := net_retrans.bpf.c +csrcs := net_retrans.c +so := libnet_retrans.so + +include ../bpfso.mk \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.bpf.c b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..d9d9428b10db56ea0f3ebf5928f53f4a2be464b7 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.bpf.c @@ -0,0 +1,282 @@ +// +// Created by 廖肇燕 on 2023/2/24. +// + +#include +#include +#include "net_retrans.h" + +struct liphdr { + __u8 ver_hdl; + __u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + __u8 ttl; + __u8 protocol; + __sum16 check; + __be32 saddr; + __be32 daddr; +}; + +#define MAX_ENTRY 128 +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +BPF_PERF_OUTPUT(perf, 1024); +BPF_STACK_TRACE(stack, MAX_ENTRY); +BPF_HASH(outCnt, int, u64, NET_RETRANS_TYPE_MAX); + +static inline void addCnt(int k, u64 val) { + u64 *pv = bpf_map_lookup_elem(&outCnt, &k); + if (pv) { + __sync_fetch_and_add(pv, val); + } +} + +static inline int get_tcp_info(struct data_t* pdata, struct tcp_sock *ts) +{ + pdata->rcv_nxt = BPF_CORE_READ(ts, rcv_nxt); + pdata->rcv_wup = BPF_CORE_READ(ts, rcv_wup); + pdata->snd_nxt = BPF_CORE_READ(ts, snd_nxt); + pdata->snd_una = BPF_CORE_READ(ts, snd_una); + pdata->copied_seq = BPF_CORE_READ(ts, copied_seq); + pdata->snd_wnd = BPF_CORE_READ(ts, snd_wnd); + pdata->rcv_wnd = BPF_CORE_READ(ts, rcv_wnd); + + pdata->lost_out = BPF_CORE_READ(ts, lost_out); + pdata->packets_out = BPF_CORE_READ(ts, packets_out); + pdata->retrans_out = BPF_CORE_READ(ts, retrans_out); + pdata->sacked_out = BPF_CORE_READ(ts, sacked_out); + pdata->reordering = BPF_CORE_READ(ts, reordering); + return 0; +} + +static inline int get_skb_info(struct data_t* pdata, struct sk_buff *skb, u32 type) +{ + u16 offset; + u8 ihl; + void* head; + struct liphdr *piph; + struct tcphdr *ptcph; + + pdata->type = type; + pdata->sk_state = 0; + + head = (void*)BPF_CORE_READ(skb, head); + offset = BPF_CORE_READ(skb, network_header); + piph = (struct liphdr *)(head + offset); + ihl = _(piph->ver_hdl) & 0x0f; + ptcph = (struct tcphdr *)((void *)piph + ihl * 4); + + pdata->ip_dst = _(piph->daddr); + pdata->dport = BPF_CORE_READ(ptcph, dest); + pdata->ip_src = _(piph->saddr); + pdata->sport = BPF_CORE_READ(ptcph, source); + return 0; +} + +static inline void get_list_task(struct list_head* phead, struct data_t* e) { + struct list_head *next = BPF_CORE_READ(phead, next); + if (next) { + wait_queue_entry_t *entry = container_of(next, wait_queue_entry_t, entry); + struct poll_wqueues *pwq = (struct poll_wqueues *)BPF_CORE_READ(entry, private); + if (pwq) + { + struct task_struct* tsk = (struct task_struct*)BPF_CORE_READ(pwq, polling_task); + if (tsk) { + e->pid = BPF_CORE_READ(tsk, pid); + bpf_probe_read(&e->comm[0], TASK_COMM_LEN, &tsk->comm[0]); + } + } + } +} + +static inline void get_sock_task(struct sock *sk, struct data_t* e) { + struct socket_wq *wq = BPF_CORE_READ(sk, sk_wq); + if (wq) { + struct list_head* phead = (struct list_head*)((char *)wq + offsetof(struct socket_wq, wait.head)); + get_list_task(phead, e); + } +} + +static inline void get_task(struct data_t* pdata, struct sock *sk) { + pdata->pid = 0; + pdata->comm[0] = '\0'; + + get_sock_task(sk, pdata); +} + +static inline int get_info(struct data_t* pdata, struct sock *sk, u32 type) +{ + struct inet_sock *inet = (struct inet_sock *)sk; + + pdata->type = type; + pdata->ip_dst = BPF_CORE_READ(sk, __sk_common.skc_daddr); + pdata->dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + pdata->ip_src = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + pdata->sport = BPF_CORE_READ(inet, inet_sport); + pdata->sk_state = BPF_CORE_READ(sk, __sk_common.skc_state); + return 0; +} + +static inline int check_inner(unsigned int ip) +{ + int i; + const unsigned int array[3][2] = { + {0x0000000A, 0x000000ff}, + {0x000010AC, 0x0000f0ff}, + {0x0000A8C0, 0x0000ffff}, + }; + + if (ip == 0) { + return 1; + } +#pragma unroll 3 + for (i =0; i < 3; i ++) { + if ((ip & array[i][1]) == array[i][0]) { + return 1; + } + } + return 0; +} + +static inline int check_ip(struct data_t* pdata) { + return check_inner(pdata->ip_src) && check_inner(pdata->ip_dst); +} + +SEC("kprobe/tcp_enter_loss") +int j_tcp_enter_loss(struct pt_regs *ctx) +{ + struct sock *sk; + struct data_t data = {}; + u32 stat; + + sk = (struct sock *)PT_REGS_PARM1(ctx); + stat = BPF_CORE_READ(sk, __sk_common.skc_state); + if (stat != 1) { + return 0; + } + get_task(&data, sk); + addCnt(NET_RETRANS_TYPE_RTO, 1); + get_info(&data, sk, NET_RETRANS_TYPE_RTO); + data.stack_id = 0; + get_tcp_info(&data, (struct tcp_sock *)sk); + if (check_ip(&data)) { + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &data, sizeof(data)); + } + return 0; +} + +SEC("kprobe/tcp_send_probe0") +int j_tcp_send_probe0(struct pt_regs *ctx) +{ + struct sock *sk; + struct data_t data = {}; + u32 stat; + + sk = (struct sock *)PT_REGS_PARM1(ctx); + stat = BPF_CORE_READ(sk, __sk_common.skc_state); + if (stat == 0) { + return 0; + } + + addCnt(NET_RETRANS_TYPE_ZERO, 1); + get_info(&data, sk, NET_RETRANS_TYPE_ZERO); + data.stack_id = 0; + get_task(&data, sk); + get_tcp_info(&data, (struct tcp_sock *)sk); + + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &data, sizeof(data)); + return 0; +} + +SEC("kprobe/tcp_v4_send_reset") +int j_tcp_v4_send_reset(struct pt_regs *ctx) +{ + struct sock *sk; + struct data_t data = {}; + + sk = (struct sock *)PT_REGS_PARM1(ctx); + if (sk == NULL) { + struct sk_buff *skb = (struct sk_buff *)PT_REGS_PARM2(ctx); + addCnt(NET_RETRANS_TYPE_RST, 1); + get_skb_info(&data, skb, NET_RETRANS_TYPE_RST); + get_task(&data, NULL); + data.stack_id = 0; + } + else { + addCnt(NET_RETRANS_TYPE_RST_SK, 1); + get_info(&data, sk, NET_RETRANS_TYPE_RST_SK); + get_task(&data, sk); + if (data.sk_state == 10) { // for listen cath skb info. + struct sk_buff *skb = (struct sk_buff *)PT_REGS_PARM2(ctx); + get_skb_info(&data, skb, NET_RETRANS_TYPE_RST_SK); + } + data.stack_id = bpf_get_stackid(ctx, &stack, KERN_STACKID_FLAGS); + } + if (check_ip(&data)) { + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &data, sizeof(data)); + } + return 0; +} + +SEC("kprobe/tcp_send_active_reset") +int j_tcp_send_active_reset(struct pt_regs *ctx) +{ + struct sock *sk; + struct data_t data = {}; + + addCnt(NET_RETRANS_TYPE_RST_ACTIVE, 1); + + sk = (struct sock *)PT_REGS_PARM1(ctx); + get_info(&data, sk, NET_RETRANS_TYPE_RST_ACTIVE); + data.stack_id = bpf_get_stackid(ctx, &stack, KERN_STACKID_FLAGS); + + get_task(&data, sk); + if (check_ip(&data)) { + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &data, sizeof(data)); + } + return 0; +} + +#define TCP_SYN_SENT 2 +#define TCPF_SYN_SENT (1 << TCP_SYN_SENT) +SEC("kprobe/tcp_retransmit_skb") +int j_tcp_retransmit_skb(struct pt_regs *ctx){ + struct sock *sk; + unsigned char stat; + + sk = (struct sock *)PT_REGS_PARM1(ctx); + + stat = BPF_CORE_READ(sk, __sk_common.skc_state); + if (stat == TCP_SYN_SENT) + { + struct data_t data = {}; + + addCnt(NET_RETRANS_TYPE_SYN, 1); + get_info(&data, sk, NET_RETRANS_TYPE_SYN); + get_task(&data, sk); + get_tcp_info(&data, (struct tcp_sock *)sk); + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &data, sizeof(data)); + } + return 0; +} + +SEC("kprobe/tcp_rtx_synack") +int j_tcp_rtx_synack(struct pt_regs *ctx) +{ + struct sock *sk, *sk2; + struct request_sock *req = (struct request_sock *)PT_REGS_PARM2(ctx); + struct data_t data = {}; + + addCnt(NET_RETRANS_TYPE_SYN_ACK, 1); + sk = (struct sock *)PT_REGS_PARM1(ctx); + sk2 = BPF_CORE_READ(req, sk); + get_info(&data, sk2, NET_RETRANS_TYPE_SYN_ACK); + get_task(&data, sk); + get_tcp_info(&data, (struct tcp_sock *)sk2); + bpf_perf_event_output(ctx, &perf, BPF_F_CURRENT_CPU, &data, sizeof(data)); + return 0; +} diff --git a/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.c b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.c new file mode 100644 index 0000000000000000000000000000000000000000..84f940a3a1a11cd181ddb4bb88198f671017e320 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.c @@ -0,0 +1,219 @@ +// +// Created by 廖肇燕 on 2023/2/24. +// + +#include "net_retrans.h" +#define COOLBPF_PERF_THREAD +#include "../bpf_head.h" +#include "net_retrans.skel.h" + +#include +#include + +#include +#include +#include + +static volatile int budget = 0; // for log budget +static int cnt_fd = 0; +static int stack_fd = 0; + +const char *net_title[] = {"rto_retrans", "zero_probe", \ + "noport_reset", "bad_sync", \ + "net_proc", "syn_send", "syn_ack"}; + +int proc(int stack_fd, struct data_t *e, struct unity_line *line); +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + int ret; + if (budget > 0) { + struct data_t *e = (struct data_t *)data; + struct beeQ *q = (struct beeQ *)ctx; + struct unity_line *line; + struct unity_lines *lines = unity_new_lines(); + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + ret = proc(stack_fd, e, line); + if (ret >= 0) { + beeQ_send(q, lines); + } + budget --; + } +} + +DEFINE_SEKL_OBJECT(net_retrans); +int init(void *arg) +{ + int ret; + printf("net_retrans plugin install.\n"); + + ret = LOAD_SKEL_OBJECT(net_retrans, perf); + cnt_fd = coobpf_map_find(net_retrans->obj, "outCnt"); + stack_fd = coobpf_map_find(net_retrans->obj, "stack"); + return ret; +} + +static int get_count(unsigned long *locals) { + int i = 0; + + for (i = 0; i < NET_RETRANS_TYPE_MAX; i ++) { + coobpf_key_value(cnt_fd, &i, &locals[i]); + } + return i; +} + +static int cal_retrans(unsigned long *values) { + int i; + static unsigned long rec[NET_RETRANS_TYPE_MAX] = {0}; + unsigned long locals[NET_RETRANS_TYPE_MAX]; + + get_count(locals); + for (i = 0; i < NET_RETRANS_TYPE_MAX; i ++) { + values[i] = locals[i] - rec[i]; + rec[i] = locals[i]; + } + return 0; +} + +int call(int t, struct unity_lines *lines) { + int i; + unsigned long values[NET_RETRANS_TYPE_MAX]; + struct unity_line* line; + + budget = t; //release log budget + + unity_alloc_lines(lines, 1); // 预分配好 + line = unity_get_line(lines, 0); + unity_set_table(line, "net_retrans_count"); + + cal_retrans(values); + for (i = 0; i < NET_RETRANS_TYPE_MAX; i ++) { + unity_set_value(line, i, net_title[i], values[i]); + } + + return 0; +} + +void deinit(void) +{ + printf("net_retrans plugin uninstall.\n"); + DESTORY_SKEL_BOJECT(net_retrans); +} + +#define LOG_MAX 256 +static char log[LOG_MAX]; + +static int transIP(unsigned long lip, char *result, int size) { + inet_ntop(AF_INET, (void *) &lip, result, size); + return 0; +} + +static const char * resetSock(int stack_fd, struct data_t *e){ + unsigned long addr[128]; + int i = e->stack_id; //last stack + struct ksym_cell* cell; + + coobpf_key_value(stack_fd, &i, &addr); + if (addr[1] > 0) { + cell = ksym_search(addr[1]); + if (cell) { + if (strcmp(cell->func, "tcp_v4_rcv") == 0) { + if (e->sk_state == 12) { + return "bad_ack"; // TCP_NEW_SYN_REC + } else { + return "tw_rst"; + } + } else if (strcmp(cell->func, "tcp_check_req") == 0) { + return "bad_syn"; + } else if (strcmp(cell->func, "tcp_v4_do_rcv") == 0) { + return "tcp_stat"; + } else { + printf("sym: %s\n", cell->func); + return "unknown_sock"; + } + } + } + return "failure_sock"; +} + +static const char * resetActive(int stack_fd, struct data_t *e){ + unsigned long addr; + int i = 1; //last stack + struct ksym_cell* cell; + + coobpf_key_value(stack_fd, &i, &addr); + if (addr) { + cell = ksym_search(addr); + if (cell) { + if (strcmp(cell->func, "tcp_out_of_resources") == 0) { + return "tcp_oom"; + } else if (strcmp(cell->func, "tcp_keepalive_timer") == 0) { + return "keep_alive"; + } else if (strcmp(cell->func, "inet_release") == 0) { + return "bad_close"; + } else if (strcmp(cell->func, "tcp_close") == 0) { + return "bad_close"; + } else if (strcmp(cell->func, "tcp_disconnect") == 0) { + return "tcp_abort"; + } else if (strcmp(cell->func, "tcp_abort") == 0) { + return "tcp_abort"; + } else { + return "unknown_active"; + } + } + } + return "failure_active"; +} + +int proc(int stack_fd, struct data_t *e, struct unity_line *line) { + char sip[32]; + char dip[32]; + + transIP(e->ip_src, sip, 32); + transIP(e->ip_dst, dip, 32); + snprintf(log, LOG_MAX, "task:%d(%s), tcp:%s:%d->%s:%d, state:%d, ", e->pid, e->comm, \ + sip, htons(e->sport), \ + dip, htons(e->dport), \ + e->sk_state); + switch (e->type) { + case NET_RETRANS_TYPE_RTO: + case NET_RETRANS_TYPE_ZERO: + case NET_RETRANS_TYPE_SYN: + case NET_RETRANS_TYPE_SYN_ACK: + { + char buf[LOG_MAX - 1]; + snprintf(buf, LOG_MAX - 1, "rcv_nxt:%u, rcv_wup:%u, snd_nxt:%u, snd_una:%u, copied_seq:%u, " + "snd_wnd:%u, rcv_wnd:%u, lost_out:%u, packets_out:%u, retrans_out:%u, " + "sacked_out:%u, reordering:%u", + e->rcv_nxt, e->rcv_wup, e->snd_nxt, e->snd_una, e->copied_seq, + e->snd_wnd, e->rcv_wnd, e->lost_out, e->packets_out, e->retrans_out, + e->sacked_out, e->reordering + ); + strncat(log, buf, LOG_MAX -1); + } + break; + case NET_RETRANS_TYPE_RST: + strncat(log, "noport", LOG_MAX - 1); + break; + case NET_RETRANS_TYPE_RST_SK: + { + const char *type = resetSock(stack_fd, e); + strncat(log, type, LOG_MAX - 1); + } + break; + case NET_RETRANS_TYPE_RST_ACTIVE: + { + const char *type = resetActive(stack_fd, e); + strncat(log, type, LOG_MAX - 1); + } + break; + default: + break; + } + unity_set_table(line, "net_retrans_log"); + unity_set_index(line, 0, "type", net_title[e->type]); + printf("%s\n", log); + unity_set_log(line, "log", log); + return 0; +} diff --git a/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.h b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.h new file mode 100644 index 0000000000000000000000000000000000000000..5d9539240341700a6d9cde9671a8ea7851d76b92 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.h @@ -0,0 +1,48 @@ +// +// Created by 廖肇燕 on 2023/2/24. +// + +#ifndef UNITY_NET_RETRANS_H +#define UNITY_NET_RETRANS_H + +#define TASK_COMM_LEN 16 + +enum { + NET_RETRANS_TYPE_RTO, + NET_RETRANS_TYPE_ZERO, + NET_RETRANS_TYPE_RST, + NET_RETRANS_TYPE_RST_SK, + NET_RETRANS_TYPE_RST_ACTIVE, + NET_RETRANS_TYPE_SYN, + NET_RETRANS_TYPE_SYN_ACK, + NET_RETRANS_TYPE_MAX, +}; + + +struct data_t { + char comm[TASK_COMM_LEN]; + unsigned int pid; + unsigned int type; + unsigned int ip_src; + unsigned int ip_dst; + unsigned short sport; + unsigned short dport; + unsigned short sk_state; + unsigned short stack_id; + + unsigned int rcv_nxt; + unsigned int rcv_wup; + unsigned int snd_nxt; + unsigned int snd_una; + unsigned int copied_seq; + unsigned int snd_wnd; + unsigned int rcv_wnd; + + unsigned int lost_out; + unsigned int packets_out; + unsigned int retrans_out; + unsigned int sacked_out; + unsigned int reordering; +}; + +#endif //UNITY_NET_RETRANS_H diff --git a/source/tools/monitor/unity/collector/plugin/netlink/Makefile b/source/tools/monitor/unity/collector/plugin/netlink/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..cfe0e64946181e6062b0ed9991f6bad85b9e3d2a --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/netlink/Makefile @@ -0,0 +1,19 @@ +CC := gcc +CFLAG := -g -fpic +LDFLAG := -g -fpic -shared +OBJS := netlink.o +SO := libnetlink.so + +all: $(SO) install + +%.o: %.c + $(CC) -c $< -o $@ $(CFLAG) + +$(SO): $(OBJS) + $(CC) -o $@ $(OBJS) $(LDFLAG) + +install: $(SO) + cp $(SO) ../../native/ + +clean: + rm -f $(SO) $(OBJS) \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/netlink/netlink.c b/source/tools/monitor/unity/collector/plugin/netlink/netlink.c new file mode 100644 index 0000000000000000000000000000000000000000..4bae1997d23274c7096ea8b10843d27bd0062cfd --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/netlink/netlink.c @@ -0,0 +1,90 @@ +#include "netlink.h" +#include +#include +#include + +static char buffer[4096]; + +int get_conntrack_drop() +{ + int total_drop = 0, i; + FILE *fp = NULL; + fp = popen("conntrack -S", "r"); + + if (!fp) + return -1; + + while (fgets(buffer, 4096, fp) != NULL) + { + char *buf = buffer; + while ((buf = strstr(buf, " drop=")) != NULL) + { + buf += strlen(" drop="); + for (i = 0;; i++) + { + if (buf[i] > '9' || buf[i] < '0') + { + buf[i] = 0; + break; + } + } + total_drop += atoi(buf); + buf += i + 1; + } + } + pclose(fp); + return total_drop; +} + +int get_tc_drop() +{ + int total_drop = 0, i; + FILE *fp = NULL; + fp = popen("tc -s qdisc", "r"); + + if (!fp) + return -1; + + while (fgets(buffer, 4096, fp) != NULL) + { + char *buf = buffer; + while ((buf = strstr(buf, "dropped ")) != NULL) + { + buf += strlen("dropped "); + for (i = 0;; i++) + { + if (buf[i] > '9' || buf[i] < '0') + { + buf[i] = 0; + break; + } + } + total_drop += atoi(buf); + buf += i + 1; + } + } + pclose(fp); + return total_drop; +} + + +int init(void * arg) { + printf("netlink plugin install\n"); + return 0; +} + +int call(int t, struct unity_lines* lines) { + struct unity_line* line; + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "netlink"); + unity_set_value(line, 0, "conntrack_drop", get_conntrack_drop()); + unity_set_value(line, 1, "tc_drop", get_tc_drop()); + + return 0; +} + +void deinit(void) { + printf("netlink plugin uninstall\n"); +} diff --git a/source/tools/monitor/unity/collector/plugin/netlink/netlink.h b/source/tools/monitor/unity/collector/plugin/netlink/netlink.h new file mode 100644 index 0000000000000000000000000000000000000000..4a55af11577dee646f55ce4d1807cb077609983f --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/netlink/netlink.h @@ -0,0 +1,11 @@ +#ifndef __NET_LINK_H +#define __NET_LINK_H + +#include "../plugin_head.h" + +int init(void * arg); +int call(int t, struct unity_lines* lines); +void deinit(void); + + +#endif \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/numainfo/Makefile b/source/tools/monitor/unity/collector/plugin/numainfo/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..841dca5f1c065d29ca7d780157a8be96a5960f75 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/numainfo/Makefile @@ -0,0 +1,19 @@ +CC := gcc +CFLAG := -g -fpic -lnuma +LDFLAG := -g -fpic -shared -lnuma +OBJS := numainfo.o +SO := libnumainfo.so + +all: $(SO) install + +%.o: %.c + $(CC) -c $< -o $@ $(CFLAG) + +$(SO): $(OBJS) + $(CC) -o $@ $(OBJS) $(LDFLAG) + +install: $(SO) + cp $(SO) ../../native/ + +clean: + rm -f $(SO) $(OBJS) diff --git a/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.c b/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.c new file mode 100644 index 0000000000000000000000000000000000000000..67017fdae592000b8cd5d1a42a267d1e67106d42 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.c @@ -0,0 +1,66 @@ +// +// Created by muya. +// + +#include "numainfo.h" +#include +#include + +int init(void * arg) { + printf("numainfo plugin install, proc: %s\n", get_unity_proc()); + return 0; +} + +int call(int t, struct unity_lines* lines) { + + // get numa node number + // yum install numactl-devel + int num_nodes = numa_max_node() + 1; + // num_nodes = 2; + // read from /sys/devices/system/node/node0/numastat + // printf("numa %d\n", num_nodes); + struct unity_line* line; + int i, j, ret; + FILE *fp; + char fname[128]; + + unity_alloc_lines(lines, num_nodes); // 预分配好 + + // unity_set_index(line, 0, "mode", "numa_num"); + // unity_set_value(line, 0, "numa_num_sum", num_nodes); + + for (i = 0; i < num_nodes; i++) { + char numa_name[10]; + snprintf(numa_name, 10, "%s%d", "node", i); + // printf("numa is %s\n", numa_name); + line = unity_get_line(lines, i); + unity_set_table(line, "numainfo"); + unity_set_index(line, 0, "node", numa_name); + fp = NULL; + errno = 0; + if (sprintf(fname, "%s%s%d%s", get_unity_proc(), "/sys/devices/system/node/node", i, "/numastat") < 0) + printf("sprintf error\n"); + // printf("fname is %s\n", fname); + if ((fp = fopen(fname, "r")) == NULL) { + ret = errno; + printf("WARN: numainfo install FAIL fopen\n"); + return ret; + } + for (j = 0; j < 6; j++) { + char k[32]; + unsigned long v; + errno = fscanf(fp, "%s %ld\n", k, &v); + if (errno < 0) + return errno; + // printf("k is %s\n", k); + unity_set_value(line, j, k, v); + } + if (fp) + fclose(fp); + } + return 0; +} + +void deinit(void) { + printf("sample plugin uninstall\n"); +} diff --git a/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.h b/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.h new file mode 100644 index 0000000000000000000000000000000000000000..14140fdbf526f945fbb752ac868e9facf0568224 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.h @@ -0,0 +1,14 @@ +// +// Created by muya. +// + +#ifndef UNITY_NUMAINFO_H +#define UNITY_NUMAINFO_H + +#include "../plugin_head.h" + +int init(void * arg); +int call(int t, struct unity_lines* lines); +void deinit(void); + +#endif //UNITY_NUMAINFO_H diff --git a/source/tools/monitor/unity/collector/plugin/plugin_head.h b/source/tools/monitor/unity/collector/plugin/plugin_head.h index 202a110f9f165042b701ff84d177f55aaaeed683..849741169c66ef39a0a6cbd2ebcc46a530f34f10 100644 --- a/source/tools/monitor/unity/collector/plugin/plugin_head.h +++ b/source/tools/monitor/unity/collector/plugin/plugin_head.h @@ -5,23 +5,28 @@ #ifndef UNITY_PLUGIN_HEAD_H #define UNITY_PLUGIN_HEAD_H +#define TABLE_SIZE 32 +#define NAME_SIZE 16 +#define INDEX_SIZE 16 + + struct unity_index { - char name[16]; - char index[16]; + char name[NAME_SIZE]; + char index[INDEX_SIZE]; }; struct unity_value { - char name[16]; + char name[NAME_SIZE]; double value; }; struct unity_log { - char name[16]; + char name[NAME_SIZE]; char* log; }; struct unity_line { - char table[32]; + char table[TABLE_SIZE]; struct unity_index indexs[4]; struct unity_value values[32]; struct unity_log logs[1]; @@ -37,8 +42,9 @@ struct unity_lines { #include #include #include "../../beeQ/beeQ.h" -#include "../native/sig_stop.h" -#include "../native/unity_interface.h" +#include "../interface/sig_stop.h" +#include "../interface/unity_interface.h" +#include "../interface/fastKsym.h" inline struct unity_lines *unity_new_lines(void) __attribute__((always_inline)); inline int unity_alloc_lines(struct unity_lines * lines, unsigned int num) __attribute__((always_inline)); @@ -83,7 +89,7 @@ inline struct unity_line * unity_get_line(struct unity_lines * lines, unsigned i } inline int unity_set_table(struct unity_line * line, const char * table) { - strncpy(line->table, table, 32); + strncpy(line->table, table, TABLE_SIZE - 1); return 0; } @@ -92,8 +98,8 @@ inline int unity_set_index(struct unity_line * line, if (i >= 4) { return -ERANGE; } - strncpy(line->indexs[i].name, name, 16); - strncpy(line->indexs[i].index, index, 16); + strncpy(line->indexs[i].name, name, NAME_SIZE - 1); + strncpy(line->indexs[i].index, index, INDEX_SIZE - 1); return 0; } @@ -102,14 +108,14 @@ inline int unity_set_value(struct unity_line * line, if (i >= 32) { return -ERANGE; } - strncpy(line->values[i].name, name, 16); + strncpy(line->values[i].name, name, NAME_SIZE - 1); line->values[i].value = value; return 0; } inline int unity_set_log(struct unity_line * line, const char * name, const char * log) { - strncpy(line->logs[0].name, name, 16); + strncpy(line->logs[0].name, name, NAME_SIZE - 1); line->logs[0].log = strdup(log); return 0; } diff --git a/source/tools/monitor/unity/collector/plugin/proc_loadavg/proc_loadavg.c b/source/tools/monitor/unity/collector/plugin/proc_loadavg/proc_loadavg.c index 7494e32523b568e3b7a618958b6e3b30507ca031..e113f55f73968c76b01baaae13b955bbcf8aa00a 100644 --- a/source/tools/monitor/unity/collector/plugin/proc_loadavg/proc_loadavg.c +++ b/source/tools/monitor/unity/collector/plugin/proc_loadavg/proc_loadavg.c @@ -4,6 +4,7 @@ #include "proc_loadavg.h" #define LOADAVG_PATH "/proc/loadavg" +char *real_proc_path; struct stats_load { unsigned long nr_running; @@ -15,6 +16,14 @@ struct stats_load { int init(void * arg) { + int i, lenth; + char *mntpath = get_unity_proc(); + + lenth = strlen(mntpath)+strlen(LOADAVG_PATH); + real_proc_path = calloc(lenth+2, 1); + if (!real_proc_path) + return -errno; + snprintf(real_proc_path, lenth+1, "%s%s", mntpath, LOADAVG_PATH); printf("proc_loadavg plugin install.\n"); return 0; } @@ -28,7 +37,7 @@ int full_line(struct unity_line *uline) fp = NULL; errno = 0; - if ((fp = fopen(LOADAVG_PATH, "r")) == NULL) { + if ((fp = fopen(real_proc_path, "r")) == NULL) { ret = errno; printf("WARN: proc_loadavg install FAIL fopen\n"); return ret; @@ -51,9 +60,9 @@ int full_line(struct unity_line *uline) st_load.nr_running--; } //unity_set_index(uline1[cpu], 0, "load", cpu_name); - unity_set_value(uline, 0, "load1", st_load.load_avg_1*100); - unity_set_value(uline, 1, "load5", st_load.load_avg_5*100); - unity_set_value(uline, 2, "load15", st_load.load_avg_15*100); + unity_set_value(uline, 0, "load1", st_load.load_avg_1); + unity_set_value(uline, 1, "load5", st_load.load_avg_5); + unity_set_value(uline, 2, "load15", st_load.load_avg_15); unity_set_value(uline, 3, "runq", st_load.nr_running); unity_set_value(uline, 4, "plit", st_load.nr_threads); @@ -74,5 +83,7 @@ int call(int t, struct unity_lines* lines) { void deinit(void) { + if (real_proc_path) + free(real_proc_path); printf("proc_loadavg plugin uninstall\n"); } diff --git a/source/tools/monitor/unity/collector/plugin/proc_schedstat/proc_schedstat.c b/source/tools/monitor/unity/collector/plugin/proc_schedstat/proc_schedstat.c index 41850778615f534a1118f8b5d295eb9800234511..b74309c56244390dbe6b92c4789a947da1dbe22f 100644 --- a/source/tools/monitor/unity/collector/plugin/proc_schedstat/proc_schedstat.c +++ b/source/tools/monitor/unity/collector/plugin/proc_schedstat/proc_schedstat.c @@ -19,17 +19,26 @@ struct sched_stats { }; long nr_cpus; +char *real_proc_path; struct unity_line **lines1; -struct sched_stats *schstats, *schstats2, *delta, *curr, *oldp; +struct sched_stats *schstats; int init(void * arg) { int ret; + int i, lenth; + char *mntpath = get_unity_proc(); + lenth = strlen(mntpath)+strlen(SCHEDSTAT_PATH); + real_proc_path = calloc(lenth+2, 1); + if (!real_proc_path) + return -errno; + snprintf(real_proc_path, lenth+1, "%s%s", mntpath, SCHEDSTAT_PATH); + printf("path=%s\n", real_proc_path); errno = 0; lines1 = NULL; - schstats = schstats2 = delta = curr = oldp = NULL; + schstats = NULL; nr_cpus = sysconf(_SC_NPROCESSORS_CONF); if (nr_cpus < 0) { ret = errno; @@ -41,53 +50,37 @@ int init(void * arg) schstats = calloc(sizeof(struct sched_stats), nr_cpus); if (!schstats) { ret = errno; + free(real_proc_path); printf("WARN: proc_schedstat install FAIL calloc 1\n"); return ret; } - schstats2 = calloc(sizeof(struct sched_stats), nr_cpus); - if (!schstats2) { - ret = errno; - printf("WARN: proc_schedstat install FAIL calloc 2\n"); - return ret; - } - delta = calloc(sizeof(struct sched_stats), nr_cpus); - if (!delta) { - ret = errno; - printf("WARN: proc_schedstat install FAIL calloc 3\n"); - return ret; - } - curr = schstats; - oldp = schstats2; lines1 = calloc(sizeof(struct unity_line *), nr_cpus); if (!lines1) { ret = errno; + free(real_proc_path); + free(schstats); printf("WARN: proc_schedstat install FAIL calloc 4\n"); return ret; } + printf("proc_schedstat plugin install.\n"); return 0; } -static void gen_delta(struct sched_stats *curr, struct sched_stats *old, struct sched_stats *delta) -{ - delta->pcount = curr->pcount - old->pcount; - delta->delay = curr->delay - old->delay; -} - -int full_line(struct unity_line **uline1, struct unity_line *uline2) +int full_line(struct unity_line **uline1) { int n, i, ret, idx; long cpu; FILE *fp; char line[128], cpu_name[8]; - struct sched_stats *st, *tmp; + struct sched_stats st; unsigned long long value[8] = {0}; unsigned long long sum_delay, sum_cnt; fp = NULL; errno = 0; idx = 0; - if ((fp = fopen(SCHEDSTAT_PATH, "r")) == NULL) { + if ((fp = fopen(real_proc_path, "r")) == NULL) { ret = errno; printf("WARN: proc_schedstat install FAIL fopen\n"); return ret; @@ -98,60 +91,38 @@ int full_line(struct unity_line **uline1, struct unity_line *uline2) while (fgets(line, LINE_LEN, fp) != NULL) { if (!strncmp(line, "cpu", 3)) { n = sscanf(line+3, "%ld", &cpu); - if (n == 1 && cpu >= 0 && cpu < nr_cpus) { - st = &curr[cpu]; - } else { + if ((n != 1) || (cpu < 0) || (cpu >= nr_cpus)) { printf("WARN:sscanf/cpu fails... n=%d,cpu=%ld, nr_cpu=%ld\n", n, cpu, nr_cpus); printf("line=[%s]\n", line+3); continue; } memset(cpu_name, 0, sizeof(cpu_name)); n = sscanf(line, "%s %llu %llu %llu %llu %llu %llu %llu %llu %llu", - cpu_name, &st->yld_count, &value[0], &value[1], &value[2], + cpu_name, &st.yld_count, &value[0], &value[1], &value[2], &value[3], &value[4], &value[5], &value[6], &value[7]); if (n == 9) { - st->sched_count = value[0]; - st->sched_goidle = value[1]; - st->ttwu_count = value[2]; - st->ttwu_local = value[3]; - st->rq_cpu_time = value[4]; - st->delay = value[5]; - st->pcount = value[6]; + st.sched_count = value[0]; + st.sched_goidle = value[1]; + st.ttwu_count = value[2]; + st.ttwu_local = value[3]; + st.rq_cpu_time = value[4]; + st.delay = value[5]; + st.pcount = value[6]; } else if (n == 10) { - st->sched_count = value[1]; - st->sched_goidle = value[2]; - st->ttwu_count = value[3]; - st->ttwu_local = value[4]; - st->rq_cpu_time = value[5]; - st->delay = value[6]; - st->pcount = value[7]; + st.sched_count = value[1]; + st.sched_goidle = value[2]; + st.ttwu_count = value[3]; + st.ttwu_local = value[4]; + st.rq_cpu_time = value[5]; + st.delay = value[6]; + st.pcount = value[7]; } - gen_delta(st, &oldp[cpu], &delta[cpu]); -#if PROC_SCH_DEBUG - printf("%s: pcount=%llu, delay=%llu\n", - cpu_name, delta[cpu].pcount, delta[cpu].delay); -#endif unity_set_index(uline1[cpu], 0, "cpu", cpu_name); - unity_set_value(uline1[cpu], 0, "pcount", delta[cpu].pcount); - unity_set_value(uline1[cpu], 1, "delay", delta[cpu].delay); + unity_set_value(uline1[cpu], 0, "pcount", st.pcount); + unity_set_value(uline1[cpu], 1, "delay", st.delay); } } - /* The avg of ALL cpus */ - for (i = 0; i < nr_cpus; i++) { - sum_cnt += delta[i].pcount; - sum_delay += delta[i].delay; - } -#if PROC_SCH_DEBUG - printf("avg: pcount=%llu, delay=%llu\n", - sum_cnt/nr_cpus, sum_delay/nr_cpus); -#endif - unity_set_index(uline2, 0, "summary", "avg"); - unity_set_value(uline2, 0, "pcount", sum_cnt/nr_cpus); - unity_set_value(uline2, 1, "delay", sum_delay/nr_cpus); - tmp = curr; - curr = oldp; - oldp = tmp; if (fp) fclose(fp); } @@ -159,28 +130,23 @@ int full_line(struct unity_line **uline1, struct unity_line *uline2) int call(int t, struct unity_lines* lines) { int i = 0; static double value = 0.0; - struct unity_line* line2; - unity_alloc_lines(lines, nr_cpus+1); + unity_alloc_lines(lines, nr_cpus); for (i = 0; i < nr_cpus; i++) { lines1[i] = unity_get_line(lines, i); unity_set_table(lines1[i], "proc_schedstat"); } - line2 = unity_get_line(lines, nr_cpus); - unity_set_table(line2, "proc_schedstat"); - full_line(lines1, line2); + full_line(lines1); return 0; } void deinit(void) { - if (schstats) - free(schstats); - if (schstats2) - free(schstats2); - if (delta) - free(delta); if (lines1) free(lines1); + if (schstats) + free(schstats); + if (real_proc_path) + free(real_proc_path); printf("proc_schedstat plugin uninstall\n"); } diff --git a/source/tools/monitor/unity/collector/plugin/proto_sender.c b/source/tools/monitor/unity/collector/plugin/proto_sender.c index bdf1397eb270a7dc82fc4d87068787e3585245de..552e6068910b5ed9e65b005311bba83898a44a67 100644 --- a/source/tools/monitor/unity/collector/plugin/proto_sender.c +++ b/source/tools/monitor/unity/collector/plugin/proto_sender.c @@ -10,23 +10,19 @@ #define PROTO_QUEUE_SIZE 64 #define gettidv1() syscall(__NR_gettid) -LUALIB_API void luaL_traceback(lua_State *L, lua_State *L1, const char *msg, int level); +extern int lua_reg_errFunc(lua_State *L); +extern int lua_check_ret(int ret); +int lua_load_do_file(lua_State *L, const char* path); -static void report_lua_failed(lua_State *L) { - fprintf(stderr, "\nFATAL ERROR:%s\n\n", lua_tostring(L, -1)); -} - -static int call_init(lua_State *L, struct beeQ* pushQ) { +static int call_init(lua_State *L, int err_func, struct beeQ* pushQ) { int ret; lua_Number lret; lua_getglobal(L, "init"); lua_pushlightuserdata(L, pushQ); lua_pushinteger(L, (int)gettidv1()); - ret = lua_pcall(L, 2, 1, 0); + ret = lua_pcall(L, 2, 1, err_func); if (ret) { - perror("proto_sender lua init func error"); - report_lua_failed(L); goto endCall; } @@ -52,6 +48,7 @@ static int call_init(lua_State *L, struct beeQ* pushQ) { extern int collector_qout(lua_State *L); lua_State * proto_sender_lua(struct beeQ* pushQ) { int ret; + int err_func; /* create a state and load standard library. */ lua_State *L = luaL_newstate(); @@ -61,20 +58,15 @@ lua_State * proto_sender_lua(struct beeQ* pushQ) { } /* opens all standard Lua libraries into the given state. */ luaL_openlibs(L); + err_func = lua_reg_errFunc(L); - ret = luaL_dofile(L, "proto_send.lua"); + ret = lua_load_do_file(L, "../beeQ/proto_send.lua"); if (ret) { - const char *msg = lua_tostring(L, -1); - perror("luaL_dofile error"); - if (msg) { - luaL_traceback(L, L, msg, 0); - fprintf(stderr, "FATAL ERROR:%s\n\n", msg); - } goto endLoad; } lua_register(L, "collector_qout", collector_qout); - ret = call_init(L, pushQ); + ret = call_init(L, err_func, pushQ); if (ret < 0) { goto endCall; } @@ -88,13 +80,13 @@ lua_State * proto_sender_lua(struct beeQ* pushQ) { struct beeQ* proto_que(lua_State *L) { int ret; + int err_func = lua_gettop(L); struct beeQ* que; lua_getglobal(L, "que"); - ret = lua_pcall(L, 0, 1, 0); + ret = lua_pcall(L, 0, 1, err_func); if (ret) { - perror("proto_que lua que func error"); - report_lua_failed(L); + lua_check_ret(ret); goto endCall; } if (!lua_isuserdata(L, -1)) { // check @@ -120,6 +112,7 @@ struct beeQ* proto_que(lua_State *L) { extern volatile int sighup_counter; int proto_send_proc(void* msg, struct beeQ* q) { int ret = 0; + int err_func; struct unity_lines *lines = (struct unity_lines *)msg; int num = lines->num; struct unity_line * pLine = lines->line; @@ -139,14 +132,14 @@ int proto_send_proc(void* msg, struct beeQ* q) { q->qarg = L; counter = sighup_counter; } + err_func = lua_gettop(L); lua_getglobal(L, "send"); lua_pushnumber(L, num); lua_pushlightuserdata(L, pLine); - ret = lua_pcall(L, 2, 1, 0); + ret = lua_pcall(L, 2, 1, err_func); if (ret) { - perror("lua call error"); - report_lua_failed(L); + lua_check_ret(ret); goto endCall; } diff --git a/source/tools/monitor/unity/collector/plugin/threads/sample_threads.c b/source/tools/monitor/unity/collector/plugin/threads/sample_threads.c index ceee07584f0de034498176f082e803bb28cffee1..169e5062fb04c8a5c371ed77a42b60cc7f86e5c0 100644 --- a/source/tools/monitor/unity/collector/plugin/threads/sample_threads.c +++ b/source/tools/monitor/unity/collector/plugin/threads/sample_threads.c @@ -17,6 +17,7 @@ int init(void * arg) { } static int sample_thread_func(struct beeQ* q, void * arg) { + unsigned int ret; while (plugin_is_working()) { static double value = 1.0; struct unity_line* line; @@ -29,7 +30,10 @@ static int sample_thread_func(struct beeQ* q, void * arg) { unity_set_value(line, 1, "value2", 2.0 + value); unity_set_log(line, "log", "hello world."); beeQ_send(q, lines); - sleep(1); + ret = sleep(5); + if (ret > 0) { // interrupt by signal + break; + } } return 0; } diff --git a/source/tools/monitor/unity/collector/plugin/unity_irqoff/Makefile b/source/tools/monitor/unity/collector/plugin/unity_irqoff/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9e86c3592f7ac7974a5c8a80ebc4267e638f9278 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_irqoff/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := unity_irqoff.bpf.c +csrcs := unity_irqoff.c +so := libunity_irqoff.so + +include ../bpfso.mk diff --git a/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.bpf.c b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..136445aae6cd5bd1c94918cee53c38c98cda2a1b --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.bpf.c @@ -0,0 +1,151 @@ +#include +#include +#include "sched_jit.h" +#include "unity_irqoff.h" + +#define PERF_MAX_STACK_DEPTH 127 +#define MAX_ENTRIES 10240 +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) + +BPF_PERF_OUTPUT(perf, 1024); + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(u32), + .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), + .max_entries = 10000, +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct arg_info); +} arg_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct tm_info); +} tm_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} events SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, u64); + __type(value, struct info); +} info_map SEC(".maps"); + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +static inline u64 get_thresh(void) +{ + u64 thresh, i = 0; + struct arg_info *argp; + + argp = bpf_map_lookup_elem(&arg_map, &i); + if (argp) + thresh = argp->thresh; + else + thresh = -1; + + return thresh; +} + +SEC("perf_event") +int hw_irqoff_event(struct bpf_perf_event_data *ctx) +{ + int i = 0; + u64 now, delta, thresh, stamp; + struct tm_info *tmifp; + struct event event = {}; + u32 cpu = bpf_get_smp_processor_id(); + + now = bpf_ktime_get_ns(); + tmifp = bpf_map_lookup_elem(&tm_map, &i); + + if (tmifp) { + stamp = tmifp->last_stamp; + thresh = get_thresh(); + if (stamp && (thresh != -1)) { + delta = now - stamp; + if (delta > thresh) { + event.cpu = cpu; + event.stamp = now; + event.delay = delta; + event.pid = bpf_get_current_pid_tgid(); + bpf_get_current_comm(&event.comm, sizeof(event.comm)); + event.ret = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + } + } + } + + return 0; +} + +SEC("perf_event") +int sw_irqoff_event1(struct bpf_perf_event_data *ctx) +{ + int ret, i = 0; + struct tm_info *tmifp, tm; + + tmifp = bpf_map_lookup_elem(&tm_map, &i); + if (tmifp) { + tmifp->last_stamp = bpf_ktime_get_ns(); + } else { + __builtin_memset(&tm, 0, sizeof(tm)); + tm.last_stamp = bpf_ktime_get_ns(); + bpf_map_update_elem(&tm_map, &i, &tm, 0); + } + return 0; +} + +SEC("perf_event") +int sw_irqoff_event2(struct bpf_perf_event_data *ctx) +{ + int i = 0; + u64 now, delta, thresh, stamp; + struct tm_info *tmifp, tm; + struct event event = {}; + u32 cpu = bpf_get_smp_processor_id(); + + now = bpf_ktime_get_ns(); + tmifp = bpf_map_lookup_elem(&tm_map, &i); + + if (tmifp) { + stamp = tmifp->last_stamp; + tmifp->last_stamp = now; + thresh = get_thresh(); + if (stamp && (thresh != -1)) { + delta = now - stamp; + if (delta > thresh) { + event.cpu = cpu; + event.delay = delta; + event.stamp = now; + event.pid = bpf_get_current_pid_tgid(); + bpf_get_current_comm(&event.comm, sizeof(event.comm)); + event.ret = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + } + } + } else { + __builtin_memset(&tm, 0, sizeof(tm)); + tm.last_stamp = now; + bpf_map_update_elem(&tm_map, &i, &tm, 0); + } + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.c b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.c new file mode 100644 index 0000000000000000000000000000000000000000..fd443057454819f5340aa8dd34223cc7104979d6 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.c @@ -0,0 +1,233 @@ +#include +#include +#include +#include +//#include +//#include +//#include +#include +#include +#include +#include +#include +#include +#include "unity_irqoff.h" +#include "sched_jit.h" +#include "unity_irqoff.skel.h" +#include "../../../../unity/beeQ/beeQ.h" + +struct env { + __u64 sample_period; + __u64 threshold; +} env = { + .threshold = 50*1000*1000, /* 10ms */ +}; + +static int nr_cpus; +struct sched_jit_summary summary, prev; +struct bpf_link **sw_mlinks, **hw_mlinks= NULL; + +DEFINE_SEKL_OBJECT(unity_irqoff); + +static int +open_and_attach_perf_event(struct perf_event_attr *attr, + struct bpf_program *prog, + struct bpf_link *links[]) +{ + int i, fd; + + for (i = 0; i < nr_cpus; i++) { + fd = syscall(__NR_perf_event_open, attr, -1, i, -1, 0); + if (fd < 0) { + /* Ignore CPU that is offline */ + if (errno == ENODEV) + continue; + fprintf(stderr, "failed to init perf sampling: %s\n", + strerror(errno)); + return -1; + } + links[i] = bpf_program__attach_perf_event(prog, fd); + if (!links[i]) { + fprintf(stderr, "failed to attach perf event on cpu: %d\n", i); + close(fd); + return -1; + } + } + return 0; +} + +/* surprise! return 0 if failed! */ +static int attach_prog_to_perf(struct unity_irqoff_bpf *obj) +{ + int ret = 0; + + struct perf_event_attr attr_hw = { + .type = PERF_TYPE_HARDWARE, + .freq = 0, + .sample_period = env.sample_period*2, /* refer to watchdog_update_hrtimer_threshold() */ + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + + struct perf_event_attr attr_sw = { + .type = PERF_TYPE_SOFTWARE, + .freq = 0, + .sample_period = env.sample_period, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + + if (!open_and_attach_perf_event(&attr_hw, obj->progs.hw_irqoff_event, hw_mlinks)) { + ret = 1<progs.sw_irqoff_event1, sw_mlinks)) + ret = ret | 1<progs.sw_irqoff_event2, sw_mlinks)) + ret = 1<num++; + summary->total += e->delay; + + if (e->delay < 10) { + summary->less10ms++; + } else if (e->delay < 50) { + summary->less50ms++; + } else if (e->delay < 100) { + summary->less100ms++; + } else if (e->delay < 500) { + summary->less500ms++; + } else if (e->delay < 1000) { + summary->less1s++; + } else { + summary->plus1s++; + } +} + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + struct event e; + e = *((struct event *)data); + e.delay = e.delay/(1000*1000); + if (e.cpu > nr_cpus - 1) + return; + update_summary(&summary, &e); +} + +static int irqoff_handler(void *arg, struct unity_irqoff_bpf *unity_irqoff) +{ + int arg_key = 0, err = 0; + struct arg_info arg_info = {}; + int arg_fd; + + /*memset(summary, 0, sizeof(struct sched_jit_summary)); */ + struct perf_thread_arguments *perf_args = + calloc(sizeof(struct perf_thread_arguments), 1); + if (!perf_args) { + printf("Failed to malloc perf_thread_arguments\n"); + DESTORY_SKEL_BOJECT(unity_irqoff); + return -ENOMEM; + } + perf_args->mapfd = bpf_map__fd(unity_irqoff->maps.events); + perf_args->sample_cb = handle_event; + perf_args->lost_cb = handle_lost_events; + perf_args->ctx = arg; + perf_thread = beeQ_send_thread(arg, perf_args, thread_worker); + + arg_fd = bpf_map__fd(unity_irqoff->maps.arg_map); + arg_info.thresh = env.threshold; + env.sample_period = env.threshold*2/5; + err = bpf_map_update_elem(arg_fd, &arg_key, &arg_info, 0); + if (err) { + fprintf(stderr, "Failed to update arg_map\n"); + return err; + } + + if (!(err = attach_prog_to_perf(unity_irqoff))) + return err; + return 0; +} + +static void bump_memlock_rlimit1(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +int init(void *arg) +{ + int err; + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + fprintf(stderr, "failed to get # of possible cpus: '%s'!\n", + strerror(-nr_cpus)); + return nr_cpus; + } + + bump_memlock_rlimit1(); + + sw_mlinks = calloc(nr_cpus, sizeof(*sw_mlinks)); + if (!sw_mlinks) { + err = errno; + fprintf(stderr, "failed to alloc sw_mlinks or rlinks\n"); + return err; + } + + hw_mlinks = calloc(nr_cpus, sizeof(*hw_mlinks)); + if (!hw_mlinks) { + err = errno; + fprintf(stderr, "failed to alloc hw_mlinks or rlinks\n"); + free(sw_mlinks); + return err; + } + + unity_irqoff = unity_irqoff_bpf__open_and_load(); + if (!unity_irqoff) { + err = errno; + fprintf(stderr, "failed to open and/or load BPF object\n"); + return err; + } + + irqoff_handler(arg, unity_irqoff); + + return 0; +} +#define delta(sum, value) \ + sum.value - prev.value +int call(int t, struct unity_lines *lines) +{ + struct unity_line *line; + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "sched_moni_jitter"); + unity_set_index(line, 0, "mod", "irqoff"); + unity_set_value(line, 0, "dltnum", delta(summary, num)); + unity_set_value(line, 1, "dlttm", delta(summary, total)); + unity_set_value(line, 2, "gt50ms", delta(summary, less100ms)); + unity_set_value(line, 3, "gt100ms", delta(summary, less500ms)); + unity_set_value(line, 4, "gt500ms", delta(summary, less1s)); + unity_set_value(line, 5, "gt1s", delta(summary, plus1s)); + prev = summary; + return 0; +} + +void deinit(void) +{ + free(sw_mlinks); + free(hw_mlinks); + printf("unity_irqoff plugin uninstall.\n"); + DESTORY_SKEL_BOJECT(unity_irqoff); +} diff --git a/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.h b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.h new file mode 100644 index 0000000000000000000000000000000000000000..1b024f9e2f25857dfb18d0e5fe282add85e9604c --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.h @@ -0,0 +1,48 @@ +#ifndef __IRQOFF_H +#define __IRQOFF_H + +#define TASK_COMM_LEN 16 +#define CPU_ARRY_LEN 4 +#define CONID_LEN 13 + +struct info { + __u64 prev_counter; +}; + +struct tm_info { + __u64 last_stamp; +}; + +struct arg_info { + __u64 thresh; +}; + +#ifndef __VMLINUX_H__ + +#include "../plugin_head.h" + +#define DEFINE_SEKL_OBJECT(skel_name) \ + struct skel_name##_bpf *skel_name = NULL; \ + static pthread_t perf_thread = 0; \ + int thread_worker(struct beeQ *q, void *arg) \ + { \ + perf_thread_worker(arg); \ + return 0; \ + } \ + void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) \ + { \ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); \ + } + +#define DESTORY_SKEL_BOJECT(skel_name) \ + if (perf_thread > 0) \ + kill_perf_thread(perf_thread); \ + skel_name##_bpf__destroy(skel_name); + +int init(void *arg); +int call(int t, struct unity_lines *lines); +void deinit(void); + +#endif +#endif /* __IRQOFF_H */ + diff --git a/source/tools/monitor/unity/collector/plugin/unity_nosched/Makefile b/source/tools/monitor/unity/collector/plugin/unity_nosched/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..3f88651ec50565cb95045caa9aeaeff274458803 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_nosched/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := unity_nosched.bpf.c +csrcs := unity_nosched.c +so := libunity_nosched.so + +include ../bpfso.mk diff --git a/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.bpf.c b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..850111668bbfb4e91dd34c2831163e259989c2eb --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.bpf.c @@ -0,0 +1,206 @@ +#include +#include +#include "sched_jit.h" +#include "unity_nosched.h" + +BPF_PERF_OUTPUT(perf, 1024); + +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) + +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_LONG 64 +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +struct bpf_map_def SEC("maps") args_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct args), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(u32), + .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), + .max_entries = 1000, +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, MAX_MONI_NR); + __type(key, u64); + __type(value, struct latinfo); +} info_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} events SEC(".maps"); + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +static inline int test_ti_thread_flag(struct thread_info *ti, int nr) +{ + int result; + unsigned long *addr; + unsigned long tmp = _(ti->flags); + + addr = &tmp; + result = 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); + return result; +} + +static inline int test_tsk_thread_flag_low(struct task_struct *tsk, int flag) +{ + struct thread_info *tfp; + + tfp = (struct thread_info *)(BPF_CORE_READ(tsk, stack)); + return test_ti_thread_flag(tfp, flag); +} + +/* + * Note: This is based on + * 1) ->thread_info is always be the first element of task_struct if CONFIG_THREAD_INFO_IN_TASK=y + * 2) ->state now is the most nearly begin of task_struct except ->thread_info if it has. + * return ture if struct thread_info is in task_struct */ +static bool test_THREAD_INFO_IN_TASK(struct task_struct *p) +{ + volatile long *pstate; + size_t len; + + pstate = &(p->state); + + len = (u64)pstate - (u64)p; + return (len == sizeof(struct thread_info)); +} + +static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) +{ + struct thread_info *tfp; + + tfp = (struct thread_info *)tsk; + return test_ti_thread_flag(tfp, flag); +} + +static inline int test_tsk_need_resched(struct task_struct *tsk, int flag) +{ + if (test_THREAD_INFO_IN_TASK(tsk)) + return test_tsk_thread_flag(tsk, flag); + else + return test_tsk_thread_flag_low(tsk, flag); +} + +SEC("kprobe/account_process_tick") +int BPF_KPROBE(account_process_tick, struct task_struct *p, int user_tick) +{ + int args_key; + u64 cpuid; + u64 resched_latency, now; + struct latinfo lati, *latp; + struct args args, *argsp; + + __builtin_memset(&args_key, 0, sizeof(int)); + argsp = bpf_map_lookup_elem(&args_map, &args_key); + if (!argsp) + return 0; + + if (_(p->pid) == 0) + return 0; + + if(!test_tsk_need_resched(p, _(argsp->flag))) + return 0; + + now = bpf_ktime_get_ns(); + __builtin_memset(&cpuid, 0, sizeof(u64)); + cpuid = bpf_get_smp_processor_id(); + latp = bpf_map_lookup_elem(&info_map, &cpuid); + if (latp) { + if (!latp->last_seen_need_resched_ns) { + latp->last_seen_need_resched_ns = now; + latp->ticks_without_resched = 0; + latp->last_perf_event = now; + } else { + latp->ticks_without_resched++; + resched_latency = now - latp->last_perf_event; + if (resched_latency > _(argsp->thresh)) { + struct event event = {0}; + event.stamp = latp->last_seen_need_resched_ns; + event.cpu = cpuid; + event.delay = now - latp->last_seen_need_resched_ns; + event.pid = bpf_get_current_pid_tgid(); + bpf_get_current_comm(&event.comm, sizeof(event.comm)); + event.ret = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + latp->last_perf_event = now; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + } + } + } else { + __builtin_memset(&lati, 0, sizeof(struct latinfo)); + lati.last_seen_need_resched_ns = now; + lati.last_perf_event = now; + bpf_map_update_elem(&info_map, &cpuid, &lati, BPF_ANY); + } + + return 0; +} + +/* +struct trace_event_raw_sched_switch { + struct trace_entry ent; + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + long int prev_state; + char next_comm[16]; + pid_t next_pid; + int next_prio; + char __data[0]; +}; + */ +SEC("tp/sched/sched_switch") +int handle_switch(struct trace_event_raw_sched_switch *ctx) +{ + int args_key; + u64 cpuid; + struct latinfo lati, *latp; + struct args *argp; + + __builtin_memset(&args_key, 0, sizeof(int)); + argp = bpf_map_lookup_elem(&args_map, &args_key); + if (!argp) + return 0; + + cpuid = bpf_get_smp_processor_id(); + latp = bpf_map_lookup_elem(&info_map, &cpuid); + if (latp) { + u64 now; + struct event event = {0}; + + now = bpf_ktime_get_ns(); + event.enter = latp->last_seen_need_resched_ns; + if (argp->thresh && event.enter && + (now - event.enter > argp->thresh)) { + event.stamp = now; + event.exit = now; + event.cpu = cpuid; + event.delay = now - latp->last_seen_need_resched_ns; + latp->last_perf_event = now; + event.pid = bpf_get_current_pid_tgid(); + bpf_get_current_comm(&event.comm, sizeof(event.comm)); + event.ret = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + } + latp->last_seen_need_resched_ns = 0; + } + + return 0; +} diff --git a/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.c b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.c new file mode 100644 index 0000000000000000000000000000000000000000..e8915d72271b45b72ea05865e29c83e012f368b7 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.c @@ -0,0 +1,156 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "unity_nosched.h" +#include "sched_jit.h" +#include "unity_nosched.skel.h" +#include "../../../../unity/beeQ/beeQ.h" + +#ifdef __x86_64__ +#define TIF_NEED_RESCHED 3 +#elif defined (__aarch64__) +#define TIF_NEED_RESCHED 1 +#endif + +unsigned int nr_cpus; +struct sched_jit_summary summary, prev; + +static void update_summary(struct sched_jit_summary* summary, const struct event *e) +{ + summary->num++; + summary->total += e->delay; + + if (e->delay < 10) { + summary->less10ms++; + } else if (e->delay < 50) { + summary->less50ms++; + } else if (e->delay < 100) { + summary->less100ms++; /* gt50 */ + } else if (e->delay < 500) { + summary->less500ms++; /* gt100 */ + } else if (e->delay < 1000) { + summary->less1s++; /* gt500 */ + } else { + summary->plus1s++; /* gt1s */ + } +} + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + struct event e; + struct event *ev = (struct event *)data; + + e = *ev; + e.delay = e.delay/(1000*1000); + if (e.cpu > nr_cpus - 1) + return; + if (e.exit != 0) + update_summary(&summary, &e); +} + + +DEFINE_SEKL_OBJECT(unity_nosched); + +static void bump_memlock_rlimit1(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +int init(void *arg) +{ + int err, argfd, args_key; + struct args args; + + bump_memlock_rlimit1(); + unity_nosched = unity_nosched_bpf__open(); + if (!unity_nosched) { + err = errno; + printf("failed to open BPF object\n"); + return -err; + } + + err = unity_nosched_bpf__load(unity_nosched); + if (err) { + fprintf(stderr, "Failed to load BPF skeleton\n"); + DESTORY_SKEL_BOJECT(unity_nosched); + return -err; + } + + argfd = bpf_map__fd(unity_nosched->maps.args_map); + args_key = 0; + args.flag = TIF_NEED_RESCHED; + args.thresh = 50*1000*1000; /* 50ms */ + + err = bpf_map_update_elem(argfd, &args_key, &args, 0); + if (err) { + fprintf(stderr, "Failed to update flag map\n"); + DESTORY_SKEL_BOJECT(unity_nosched); + return err; + } + + nr_cpus = libbpf_num_possible_cpus(); + memset(&summary, 0, sizeof(summary)); + { + struct perf_thread_arguments *perf_args = + malloc(sizeof(struct perf_thread_arguments)); + if (!perf_args) { + printf("Failed to malloc perf_thread_arguments\n"); + DESTORY_SKEL_BOJECT(unity_nosched); + return -ENOMEM; + } + memset(perf_args, 0, sizeof(struct perf_thread_arguments)); + perf_args->mapfd = bpf_map__fd(unity_nosched->maps.events); + perf_args->sample_cb = handle_event; + perf_args->lost_cb = handle_lost_events; + perf_args->ctx = arg; + perf_thread = beeQ_send_thread(arg, perf_args, thread_worker); + } + err = unity_nosched_bpf__attach(unity_nosched); + if (err) { + printf("failed to attach BPF programs: %s\n", strerror(err)); + DESTORY_SKEL_BOJECT(unity_nosched); + return err; + } + + printf("unity_nosched plugin install.\n"); + return 0; +} + +#define delta(sum, value) \ + sum.value - prev.value +int call(int t, struct unity_lines *lines) +{ + struct unity_line *line; + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "sched_moni_jitter"); + unity_set_index(line, 0, "mod", "noschd"); + unity_set_value(line, 0, "dltnum", delta(summary,num)); + unity_set_value(line, 1, "dlttm", delta(summary,total)); + unity_set_value(line, 2, "gt50ms", delta(summary,less100ms)); + unity_set_value(line, 3, "gt100ms", delta(summary,less500ms)); + unity_set_value(line, 4, "gt500ms", delta(summary,less1s)); + unity_set_value(line, 5, "gt1s", delta(summary,plus1s)); + prev = summary; + return 0; +} + +void deinit(void) +{ + printf("unity_nosched plugin uninstall.\n"); + DESTORY_SKEL_BOJECT(unity_nosched); +} diff --git a/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.h b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.h new file mode 100644 index 0000000000000000000000000000000000000000..f7280f4bf2088230af297d819002d13cb238924b --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.h @@ -0,0 +1,92 @@ + + +#ifndef BPF_SAMPLE_H +#define BPF_SAMPLE_H + +#define MAX_MONI_NR 1024 + +#define PERF_MAX_STACK_DEPTH 32 +struct args { + int flag; + unsigned long long thresh; +}; + +struct latinfo { + unsigned long long last_seen_need_resched_ns; + unsigned long long last_perf_event; + int ticks_without_resched; +}; + +#ifndef __VMLINUX_H__ + +#include "../plugin_head.h" + +#define DEFINE_SEKL_OBJECT(skel_name) \ + struct skel_name##_bpf *skel_name = NULL; \ + static pthread_t perf_thread = 0; \ + int thread_worker(struct beeQ *q, void *arg) \ + { \ + perf_thread_worker(arg); \ + return 0; \ + } \ + void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) \ + { \ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); \ + } + +#define LOAD_SKEL_OBJECT(skel_name, perf) \ + ( \ + { \ + __label__ load_bpf_skel_out; \ + int __ret = 0; \ + skel_name = skel_name##_bpf__open(); \ + if (!skel_name) \ + { \ + printf("failed to open BPF object\n"); \ + __ret = -1; \ + goto load_bpf_skel_out; \ + } \ + __ret = skel_name##_bpf__load(skel_name); \ + if (__ret) \ + { \ + printf("failed to load BPF object: %d\n", __ret); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + __ret = skel_name##_bpf__attach(skel_name); \ + if (__ret) \ + { \ + printf("failed to attach BPF programs: %s\n", strerror(-__ret)); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + struct perf_thread_arguments *perf_args = malloc(sizeof(struct perf_thread_arguments)); \ + if (!perf_args) \ + { \ + __ret = -ENOMEM; \ + printf("failed to allocate memory: %s\n", strerror(-__ret)); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + memset(perf_args, 0, sizeof(struct perf_thread_arguments)); \ + perf_args->mapfd = bpf_map__fd(skel_name->maps.perf); \ + perf_args->sample_cb = handle_event; \ + perf_args->lost_cb = handle_lost_events; \ + perf_args->ctx = arg; \ + perf_thread = beeQ_send_thread(arg, perf_args, thread_worker); \ + load_bpf_skel_out: \ + __ret; \ + }) + +#define DESTORY_SKEL_BOJECT(skel_name) \ + if (perf_thread > 0) \ + kill_perf_thread(perf_thread); \ + skel_name##_bpf__destroy(skel_name); + +int init(void *arg); +int call(int t, struct unity_lines *lines); +void deinit(void); + +#endif + +#endif diff --git a/source/tools/monitor/unity/collector/pod_allocpage.lua b/source/tools/monitor/unity/collector/pod_allocpage.lua new file mode 100644 index 0000000000000000000000000000000000000000..23a2f4defc854729dc982aa8e03a8800b506de3e --- /dev/null +++ b/source/tools/monitor/unity/collector/pod_allocpage.lua @@ -0,0 +1,190 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liuxinwnei. +--- DateTime: 2023/02/08 17:00 PM +--- + +require("common.class") +local fcntl = require("posix.fcntl") +local unistd = require("posix.unistd") +local dirent = require("posix.dirent") +local stdlib = require("posix.stdlib") +local stat = require("posix.sys.stat") +local cjson = require("cjson") +local json = cjson.new() +local CkvProc = require("collector.kvProc") +local CvProc = require("collector.vproc") +local pystring = require("common.pystring") +local dockerinfo = require("common.dockerinfo") + +local CPodAlloc = class("podalloc", CkvProc) + +function CPodAlloc:_init_(proto, pffi, mnt, pFile) + CkvProc._init_(self, proto, pffi, mnt, pFile , "pod_alloc") + self._ffi = require("collector.native.plugincffi") + self.proc_fs, self.sys_fs, self.pods_fs, self.root_fs = dockerinfo:get_hostfs() + self.name_space = {} + self.pod_mem = {} + self.total = 0 +end + +function CPodAlloc:file_exists(file) + local f=stat.lstat(file) + if f ~= nil then + return true + else + return false + end +end + +function CPodAlloc:switch_ns(pid) + local pid_ns = self.proc_fs .. pid .. "/ns/net" + if not self:file_exists(pid_ns) then return end + + local f = fcntl.open(pid_ns,fcntl.O_RDONLY) + self._ffi.C.setns(f,0) + unistd.close(f) +end + +function CPodAlloc:get_container_info(did) + local res = "unknow" + local podname = did + local podns = did + local cname = did + + res = dockerinfo:get_inspect(did) + local restable = json.decode(res) + if #restable > 0 then + restable = restable[1] + end + if restable['Config'] then + local config = restable['Config'] + if config['Labels'] then + local label = config['Labels'] + if label['io.kubernetes.pod.name'] then + podname = label['io.kubernetes.pod.name'] + end + if label['io.kubernetes.container.name'] then + cname = label['io.kubernetes.container.name'] + end + if label['io.kubernetes.pod.namespace'] then + podns = label['io.kubernetes.pod.namespace'] + end + end + if podname == did and restable['Name'] then + cname = restable['Name'] + podname = restable['Name'] + end + elseif restable['status'] then + podname = restable['status']['labels']['io.kubernetes.pod.name'] + cname = restable['status']['labels']['io.kubernetes.container.name'] + podns = restable['status']['labels']['io.kubernetes.pod.namespace'] + end + if pystring:startswith(podname,"/") then podname=string.sub(podname,2,-1) end + if not self.pod_mem[podname] then + self.pod_mem[podname] = {} + self.pod_mem[podname]["allocpage"] = 0 + self.pod_mem[podname]["podns"] = podns + self.pod_mem[podname]["podname"] = podname + end + return podname +end + +function CPodAlloc:get_pidalloc() + local pods = {} + local dockerids = {} + for net,pidn in pairs(self.name_space) do + if pidn == "self" then pidn = "1" end + + self:switch_ns(pidn) + -- local env = posix.getenv() + -- env["PROC_ROOT"] = self.proc_fs + + stdlib.setenv("PROC_ROOT",self.proc_fs) + local pfile = io.popen("ss -anp","r") + io.input(pfile) + for line in io.lines() do + repeat + local proto,recv,task,pid = string.match(line,"(%S*)%s*%S*%s*(%d*).*users:%S*\"(%S*)\",pid=(%d*)") + if not proto or not recv or not task or not pid then break end + if proto ~="tcp" and proto ~="udp" and proto ~="raw" then break end + + recv = tonumber(recv) + + local dockerid = "" + if not dockerids[pid] then + dockerid = dockerinfo:get_dockerid(pid) + if dockerid == "unknow" then break end + dockerids[pid] = dockerid + else + dockerid = dockerids[pid] + end + + local podname = dockerid + if not pods[dockerid] then + podname = self:get_container_info(dockerid) + pods[dockerid] = podname + else + podname = pods[dockerid] + end + if recv < 1024 and podname == dockerid then break end + + if not self.pod_mem[podname] then + self.pod_mem[podname] = {} + self.pod_mem[podname]["allocpage"] = 0 + self.pod_mem[podname]["podns"] = podname + self.pod_mem[podname]["podname"] = podname + end + self.pod_mem[podname]["allocpage"] = self.pod_mem[podname]["allocpage"] + recv + self.total = self.total + recv + until true + end + pfile:close() + self:switch_ns("1") + stdlib.setenv("PROC_ROOT","") + end +end + +function CPodAlloc:scan_namespace() + local root = self.proc_fs + for pid in dirent.files(root) do + repeat + if pystring:startswith(pid,".") then break end + if not self:file_exists(self.proc_fs .. pid .. "/comm") then break end + + local proc_ns = self.proc_fs .. pid .. "/ns/net" + if not self:file_exists(proc_ns) then break end + + local slink = unistd.readlink(proc_ns) + if not slink then break end + if not string.find(slink,"net") then break end + + local inode = string.match(slink,"%[(%S+)%]") + if not inode then break end + + if not self.name_space[inode] then self.name_space[inode] = pystring:strip(pid) end + if not self:file_exists(root .. self.name_space[inode] .. "/comm") then self.name_space[inode] = pystring:strip(pid) end + until true + end +end + +function CPodAlloc:proc(elapsed, lines) + CvProc.proc(self) + self.name_space = {} + self.pod_mem = {} + self.total = 0 + self:scan_namespace() + self:get_pidalloc() + + for k,v in pairs(self.pod_mem) do + local cell = {{name="pod_allocpage", value=v['allocpage']/1024}} + local label = {{name="podname",index=v['podname'],}, {name="namespace",index = v['podns'],},} + self:appendLine(self:_packProto("pod_alloc", label, cell)) + end + local cell = {{name="pod_allocpage_total", value=self.total/1024}} + self:appendLine(self:_packProto("pod_alloc", nil, cell)) + + self:push(lines) +end + +return CPodAlloc diff --git a/source/tools/monitor/unity/collector/proc_arp.lua b/source/tools/monitor/unity/collector/proc_arp.lua new file mode 100644 index 0000000000000000000000000000000000000000..d38c6d3274bb2c1c42274212421fbbcd7c5de7b3 --- /dev/null +++ b/source/tools/monitor/unity/collector/proc_arp.lua @@ -0,0 +1,47 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/7 2:30 PM +--- + +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") + +local CprocArp = class("procArp", CvProc) + +function CprocArp:_init_(proto, pffi, mnt, pFile) + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/net/arp") +end + +function CprocArp:proc(elapsed, lines) + local c = 0 + CvProc.proc(self) + local arps = {} + for line in io.lines(self.pFile) do + if c > 0 then + local cells = pystring:split(line) + if arps[cells[6]] then + arps[cells[6]] = arps[cells[6]] + 1 + else + arps[cells[6]] = 1 + end + end + c = c + 1 + end + + for k, v in pairs(arps) do + local ls = { + name = "dev", + index = k, + } + local vs = { + name = "count", + value = v, + } + self:appendLine(self:_packProto("arp", {ls}, {vs})) + end + self:push(lines) +end + +return CprocArp \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/proc_buddyinfo.lua b/source/tools/monitor/unity/collector/proc_buddyinfo.lua index 8d85696eb6ca22865f8683518ede044cefd3e5fa..e324c0d56c989f2972c2b045cf743453ee8b3e88 100644 --- a/source/tools/monitor/unity/collector/proc_buddyinfo.lua +++ b/source/tools/monitor/unity/collector/proc_buddyinfo.lua @@ -5,22 +5,27 @@ --- require("common.class") -local CkvProc = require("collector.kvProc") local CvProc = require("collector.vproc") local pystring = require("common.pystring") -local CprocBuddyinfo = class("proc_buddyinfo", CkvProc) +local CprocBuddyinfo = class("proc_buddyinfo", CvProc) function CprocBuddyinfo:_init_(proto, pffi, mnt,pFile) - CkvProc._init_(self, proto, pffi, mnt, pFile or "proc/buddyinfo", "buddyinfo") + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/buddyinfo") + self._protoTable = { + line = "buddyinfo", + ls = nil, + vs = {} + } end function CprocBuddyinfo:proc(elapsed, lines) CvProc.proc(self) - buddyinfo = {} + self._protoTable.vs = {} + local buddyinfo = {} for line in io.lines(self.pFile) do if string.find(line,"Normal") then - subline = pystring:split(line,"Normal",1)[2] + local subline = pystring:split(line,"Normal",1)[2] for num in string.gmatch(subline, "%d+") do table.insert(buddyinfo,tonumber(num)) end @@ -31,7 +36,7 @@ function CprocBuddyinfo:proc(elapsed, lines) if not buddyinfo then for line in io.lines(self.pFile) do if string.find(line,"DMA32") then - subline = pystring:split(line,"DMA32",1)[2] + local subline = pystring:split(line,"DMA32",1)[2] for num in string.gmatch(subline, "%d+") do table.insert(buddyinfo,tonumber(num)) end @@ -46,7 +51,7 @@ function CprocBuddyinfo:proc(elapsed, lines) end self:appendLine(self._protoTable) - return self:push(lines) + self:push(lines) end return CprocBuddyinfo diff --git a/source/tools/monitor/unity/collector/proc_cgroups.lua b/source/tools/monitor/unity/collector/proc_cgroups.lua new file mode 100644 index 0000000000000000000000000000000000000000..7c95636b37e05f92daf78dd502cac49f0a513e55 --- /dev/null +++ b/source/tools/monitor/unity/collector/proc_cgroups.lua @@ -0,0 +1,40 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/8 2:32 PM +--- + +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") + +local Ccgroups = class("cgroups", CvProc) + +function Ccgroups:_init_(proto, pffi, mnt, pFile) + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/cgroups") +end + +function Ccgroups:proc(elapsed, lines) + local c = 0 + CvProc.proc(self) + local values = {} + local ls = { + name = "type", + index = "num_cgroups", + } + + for line in io.lines(self.pFile) do + if c > 0 then + local cell = pystring:split(line) + values[c - 1] = { + name = cell[1], + value = tonumber(cell[3]) + } + end + c = c + 1 + end + self:appendLine(self:_packProto("cgroups", {ls}, values)) + self:push(lines) +end + +return Ccgroups diff --git a/source/tools/monitor/unity/collector/proc_diskstats.lua b/source/tools/monitor/unity/collector/proc_diskstats.lua index 45badf6b35b2847a7f6d108738d2b856f6525343..a8535414e6e969e6319884bc31581a0c233f4e4b 100644 --- a/source/tools/monitor/unity/collector/proc_diskstats.lua +++ b/source/tools/monitor/unity/collector/proc_diskstats.lua @@ -127,7 +127,7 @@ function CprocDiskstats:proc(elapsed, lines) self:_proc(line, elapsed) end self:checkLastDisks() - return self:push(lines) + self:push(lines) end return CprocDiskstats diff --git a/source/tools/monitor/unity/collector/proc_interrupts.lua b/source/tools/monitor/unity/collector/proc_interrupts.lua new file mode 100644 index 0000000000000000000000000000000000000000..9b66c3447edb5c46caaf01dca5c17098f79cb355 --- /dev/null +++ b/source/tools/monitor/unity/collector/proc_interrupts.lua @@ -0,0 +1,69 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/8 11:07 AM +--- + +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") +local unistd = require("posix.unistd") + +local Cinterrupts = class("interrupts", CvProc) + +function Cinterrupts:_init_(proto, pffi, mnt, pFile) + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/interrupts") + self._cpus = unistd.sysconf(84) +end + +function Cinterrupts:proc(elapsed, lines) + local c = 0 + CvProc.proc(self) + local ints = {} + for line in io.lines(self.pFile) do + if c > 0 then + ints[c] = pystring:split(pystring:strip(line)) + end + c = c + 1 + end + + local per_irqs = {} + for _, int in ipairs(ints) do + local nums = #int + if nums > self._cpus then + local title = int[1] + local head + if string.match(title, "%d+:") then + head = int[nums] + else + head = string.lower(string.sub(title, 1, -2)) + end + + for i=1, self._cpus do + if not per_irqs[i] then + per_irqs[i] = {} + end + per_irqs[i][head] = tonumber(int[i + 1]) + end + end + end + + for i, irq in ipairs(per_irqs) do + local ls = { + name = "cpu", index = "cpu" .. (i - 1) + } + local c = 1 + local values = {} + for k, v in pairs(irq) do + values[c] = { + name = k, + value = v, + } + c = c + 1 + end + self:appendLine(self:_packProto("interrupts", {ls}, values)) + end + self:push(lines) +end + +return Cinterrupts diff --git a/source/tools/monitor/unity/collector/proc_meminfo.lua b/source/tools/monitor/unity/collector/proc_meminfo.lua index 5a1dd38714034c48883a4ced3937c7fa71a16732..2902cd848882d6ea116aed725452f20eca424184 100644 --- a/source/tools/monitor/unity/collector/proc_meminfo.lua +++ b/source/tools/monitor/unity/collector/proc_meminfo.lua @@ -33,7 +33,7 @@ function CprocMeminfo:readIomem() reserved = reserved + (tonumber(cells[2], 16)-tonumber(cells[1], 16)) end end - self._protoTable_dict["vs"]["res"],_ = math.modf(reserved/1024) + self._protoTable_dict["vs"]["res"], _ = math.modf(reserved/1024) end function CprocMeminfo:readVmalloc() @@ -61,11 +61,11 @@ function CprocMeminfo:readUsed() end function CprocMeminfo:readHugepage(size,name) - file = "/sys/kernel/mm/hugepages/hugepages-" .. size .. "kB/nr_hugepages" + local file = "/sys/kernel/mm/hugepages/hugepages-" .. size .. "kB/nr_hugepages" local f=io.open(file,"r") if f ~= nil then - pages = tonumber(f:read("*a")) + local pages = tonumber(f:read("*a")) io.close(f) self._protoTable_dict["vs"][name]=pages * size end @@ -91,7 +91,7 @@ function CprocMeminfo:proc(elapsed, lines) for line in io.lines(self.pFile) do self:readKV(line) end - tmp_dict = self._protoTable_dict.vs + local tmp_dict = self._protoTable_dict.vs local cell = {name="total", value=tmp_dict["MemTotal"]+tmp_dict["res"]} table.insert(self._protoTable["vs"], cell) @@ -156,7 +156,7 @@ function CprocMeminfo:proc(elapsed, lines) table.insert(self._protoTable["vs"], cell) self:appendLine(self._protoTable) - return self:push(lines) + self:push(lines) end diff --git a/source/tools/monitor/unity/collector/proc_mounts.lua b/source/tools/monitor/unity/collector/proc_mounts.lua index 4de16c327bcba74a277713bff7996ea95b40c3e0..2134f8a69e47288af174f9c9ddd834da5e3ea576 100644 --- a/source/tools/monitor/unity/collector/proc_mounts.lua +++ b/source/tools/monitor/unity/collector/proc_mounts.lua @@ -23,8 +23,10 @@ local function get_lines(fName) local fName = fName or "/proc/mounts" local f = assert(io.open(fName, "r")) + local c = 0 for line in f:lines() do - table.insert(lines, line) + c = c + 1 + lines[c] = line end return lines end @@ -79,27 +81,29 @@ end function CprocMounts:_proc() for k, v in pairs(self._mpoints) do local stat = statvfs(k) - local ls = { - { - name = "fs", - index = v, - }, - { - name = "mount", - index = k, - }, - } - local vs = { - { name="f_bsize", value=stat.f_bsize, }, - { name="f_blocks", value=stat.f_blocks, }, - { name="f_bfree", value=stat.f_bfree, }, - { name="f_bavail", value=stat.f_bavail, }, - { name="f_files", value=stat.f_files, }, - { name="f_ffree", value=stat.f_ffree, }, - { name="f_favail", value=stat.f_favail, }, - } - local line = self:_packProto("fs_stat", ls, vs) - self:appendLine(line) + if stat then -- stat may return 0 + local ls = { + { + name = "fs", + index = v, + }, + { + name = "mount", + index = k, + }, + } + local vs = { + { name="f_bsize", value=stat.f_bsize, }, + { name="f_blocks", value=stat.f_blocks, }, + { name="f_bfree", value=stat.f_bfree, }, + { name="f_bavail", value=stat.f_bavail, }, + { name="f_files", value=stat.f_files, }, + { name="f_ffree", value=stat.f_ffree, }, + { name="f_favail", value=stat.f_favail, }, + } + local line = self:_packProto("fs_stat", ls, vs) + self:appendLine(line) + end end end @@ -108,7 +112,7 @@ function CprocMounts:proc(elapsed, lines) CvProc.proc(self) self:_proc() - return self:push(lines) + self:push(lines) end return CprocMounts \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/proc_netdev.lua b/source/tools/monitor/unity/collector/proc_netdev.lua index 7a2f09543dad999dfd1da4b41e020dd027ef0e57..d546ad006e68cc027043d1890121682c0a41840a 100644 --- a/source/tools/monitor/unity/collector/proc_netdev.lua +++ b/source/tools/monitor/unity/collector/proc_netdev.lua @@ -27,7 +27,7 @@ function CprocNetdev:_getNewValue(ifName, data) local now = {} local index = self:_netdevIndex() for i = 1, #index do - table.insert(now, tonumber(data.value[i - 1])) + now[i] = tonumber(data.value[i - 1]) end self._lastData[ifName] = now self._lastIfNames[ifName] = 1 @@ -43,13 +43,13 @@ function CprocNetdev:_calcIf(ifName, data, res, elapsed) } for i, index in ipairs(index) do local nowValue = tonumber(data.value[i -1]) - table.insert(now, nowValue) + now[i] = nowValue local value = (nowValue - res[i]) / elapsed local cell = { name = index, value = value } - table.insert(protoTable.vs, cell) + protoTable.vs[i] = cell end self:appendLine(protoTable) @@ -91,7 +91,7 @@ function CprocNetdev:proc(elapsed, lines) i = i + 1 end self:checkLastIfNames() - return self:push(lines) + self:push(lines) end return CprocNetdev diff --git a/source/tools/monitor/unity/collector/proc_snmp_stat.lua b/source/tools/monitor/unity/collector/proc_snmp_stat.lua index 199bdbea6d5f63dfe9b075f44790253a73849845..a9e79e10f3f6956f62fa3d963844cbc45c825622 100644 --- a/source/tools/monitor/unity/collector/proc_snmp_stat.lua +++ b/source/tools/monitor/unity/collector/proc_snmp_stat.lua @@ -58,12 +58,14 @@ end function CprocSnmpStat:pack(labels, logs) local vs = {} + local c = 0 for k, v in pairs(labels) do local value = { name = k, value = tonumber(v) } - table.insert(vs, value) + c = c + 1 + vs[c] = value end self:appendLine(self:_packProto("pkt_status", nil, vs)) if #logs > 0 then @@ -82,6 +84,7 @@ function CprocSnmpStat:check(now) local labels = self:createLabels() local logs = {} if self._rec then + local c = 0 for k, v in pairs(now) do if self._rec[k] and self._rec[k] < v then -- local delta = v - self._rec[k] @@ -91,7 +94,8 @@ function CprocSnmpStat:check(now) labels[lk] = lv + delta end end - table.insert(logs, string.format("%s: %d", k, tonumber(delta))) + c = c + 1 + logs[c] = string.format("%s: %d", k, tonumber(delta)) end end end @@ -105,7 +109,7 @@ function CprocSnmpStat:proc(elapsed, lines) self:_proc(self.pFile .. "proc/net/snmp", now) self:_proc(self.pFile .. "proc/net/netstat", now) self:check(now) - return self:push(lines) + self:push(lines) end return CprocSnmpStat diff --git a/source/tools/monitor/unity/collector/proc_sockstat.lua b/source/tools/monitor/unity/collector/proc_sockstat.lua index d906ae97bf744e556c50846b5a3419d18c012d26..3a0769476c63c3df7579e0d91db822cea730d103 100644 --- a/source/tools/monitor/unity/collector/proc_sockstat.lua +++ b/source/tools/monitor/unity/collector/proc_sockstat.lua @@ -17,6 +17,7 @@ end function CprocSockStat:proc(elapsed, lines) CvProc.proc(self) local vs = {} + local c = 0 for line in io.lines(self.pFile) do local cells = pystring:split(line, ":", 1) if #cells > 1 then @@ -31,12 +32,13 @@ function CprocSockStat:proc(elapsed, lines) name = title, value = tonumber(bodies[2 * i]) } - table.insert(vs, v) + c = c + 1 + vs[c] = v end end end self:appendLine(self:_packProto("sock_stat", nil, vs)) - return self:push(lines) + self:push(lines) end return CprocSockStat diff --git a/source/tools/monitor/unity/collector/proc_softirqs.lua b/source/tools/monitor/unity/collector/proc_softirqs.lua new file mode 100644 index 0000000000000000000000000000000000000000..d8b1f48930bcc562325299d4983c3d86b16cfb7b --- /dev/null +++ b/source/tools/monitor/unity/collector/proc_softirqs.lua @@ -0,0 +1,58 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/7 10:08 PM +--- + +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") + +local Csoftirqs = class("softirqs", CvProc) + +function Csoftirqs:_init_(proto, pffi, mnt, pFile) + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/softirqs") +end + +function Csoftirqs:proc(elapsed, lines) + local c = 0 + CvProc.proc(self) + local sirqs = {} + for line in io.lines(self.pFile) do + if c > 0 then + local title, irq_str = unpack(pystring:split(line, ":", 1)) + title = string.lower(pystring:strip(title)) + sirqs[title] = pystring:split(pystring:strip(irq_str)) + end + c = c + 1 + end + + local per_sirqs = {} + for k, irqs in pairs(sirqs) do + for i, v in ipairs(irqs) do + if per_sirqs[i] == nil then + per_sirqs[i] = {} + end + per_sirqs[i][k] = tonumber(v) + end + end + + for i, irq in ipairs(per_sirqs) do + local ls = { + name = "cpu", index = "cpu" .. (i - 1) + } + local c = 1 + local values = {} + for k, v in pairs(irq) do + values[c] = { + name = k, + value = v, + } + c = c + 1 + end + self:appendLine(self:_packProto("per_sirqs", {ls}, values)) + end + self:push(lines) +end + +return Csoftirqs diff --git a/source/tools/monitor/unity/collector/proc_softnet_stat.lua b/source/tools/monitor/unity/collector/proc_softnet_stat.lua new file mode 100644 index 0000000000000000000000000000000000000000..6aa952ea58caec01652d67e5bed90d7797f72c54 --- /dev/null +++ b/source/tools/monitor/unity/collector/proc_softnet_stat.lua @@ -0,0 +1,53 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/7 8:43 PM +--- + +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") + +local CsoftnetStat = class("softnetStat", CvProc) + +-- refer to https://insights-core.readthedocs.io/en/latest/shared_parsers_catalog/softnet_stat.html +function CsoftnetStat:_init_(proto, pffi, mnt, pFile) + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/net/softnet_stat") +end + +function CsoftnetStat:proc(elapsed, lines) + local c = 0 + CvProc.proc(self) + local softnets = {} + for line in io.lines(self.pFile) do + local cell = {} + local cells = pystring:split(line) + cell.packet_process = tonumber(cells[1], 16) + cell.packet_drop = tonumber(cells[2], 16) + cell.time_squeeze = tonumber(cells[3], 16) + cell.cpu_collision = tonumber(cells[9], 16) + cell.received_rps = tonumber(cells[10], 16) + cell.flow_limit_count = tonumber(cells[11], 16) + c = c + 1 + softnets[c] = cell + end + + for i, cpus in ipairs(softnets) do + local ls = { + name = "cpu", index = tostring(i) + } + local c = 1 + local values = {} + for k, v in pairs(cpus) do + values[c] = { + name = k, + value = v, + } + c = c + 1 + end + self:appendLine(self:_packProto("softnets", {ls}, values)) + end + self:push(lines) +end + +return CsoftnetStat diff --git a/source/tools/monitor/unity/collector/proc_stat.lua b/source/tools/monitor/unity/collector/proc_stat.lua index a7096896be758fd600aa9bab2c84ef54b894cb5c..c165a282a5a60ce95c1b9d33b0a6f4dae5c46377 100644 --- a/source/tools/monitor/unity/collector/proc_stat.lua +++ b/source/tools/monitor/unity/collector/proc_stat.lua @@ -185,7 +185,7 @@ function CprocStat:proc(elapsed, lines) end end self:appendLine(self:_packProto("stat_counters", nil, counter, nil)) - return self:push(lines) + self:push(lines) end return CprocStat diff --git a/source/tools/monitor/unity/collector/proc_statm.lua b/source/tools/monitor/unity/collector/proc_statm.lua index d3ebeb6083b24bff86186f1536c45c3e3684b68c..f1ad61e37e66e7d0dc95a2e8f0b6f64f738e5bf3 100644 --- a/source/tools/monitor/unity/collector/proc_statm.lua +++ b/source/tools/monitor/unity/collector/proc_statm.lua @@ -16,6 +16,7 @@ end function CprocStatm:proc(elapsed, lines) CvProc.proc(self) local heads = {"size", "resident", "shared", "text", "lib", "data", "dt"} + local c = 0 for line in io.lines("/proc/self/statm") do local vs = {} local data = self._ffi.new("var_long_t") @@ -26,12 +27,13 @@ function CprocStatm:proc(elapsed, lines) name = k, value = tonumber(data.value[i - 1]), } - table.insert(vs, cell) + c = c + 1 + vs[c] = cell end self:appendLine(self:_packProto("self_statm", nil, vs)) end - return self:push(lines) + self:push(lines) end return CprocStatm diff --git a/source/tools/monitor/unity/collector/proc_uptime.lua b/source/tools/monitor/unity/collector/proc_uptime.lua new file mode 100644 index 0000000000000000000000000000000000000000..4d9d778b4f4bca7ad60e2a5a9119aafdcaa21a46 --- /dev/null +++ b/source/tools/monitor/unity/collector/proc_uptime.lua @@ -0,0 +1,90 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/1 3:13 PM +--- + +require("common.class") +local utsname = require("posix.sys.utsname") +local CvProc = require("collector.vproc") + +local CprocUptime = class("procUptime", CvProc) + +function CprocUptime:_init_(proto, pffi, mnt, pFile) + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/uptime") + local distro, s, errno = utsname.uname() + if distro then + self._labels = { + {name = "sysname", index = distro.sysname}, + {name = "nodename", index = distro.nodename}, + {name = "release", index = distro.release}, + {name = "version", index = distro.version}, + {name = "machine", index = distro.machine}, + } + else + error(string.format("read uname get %s, errno %d"), s, errno) + end + self._release = mnt .. "etc/system-release" + self._counter = 60 * 60 +end + +local function readNum(pFile) + local f = io.open(pFile,"r") + local res1, res2 = -1, -1 + if f then + res1, res2 = f:read("*n"), f:read("*n") + f:close() + end + return res1, res2 +end + +local function readUname() + local distro, s, errno = utsname.uname() + if distro then + return { + {name = "sysname", index = distro.sysname}, + {name = "nodename", index = distro.nodename}, + {name = "release", index = distro.release}, + {name = "version", index = distro.version}, + {name = "machine", index = distro.machine}, + } + else + error(string.format("read uname get %s, errno %d"), s, errno) + end +end + +local function readRelease(pFile) + local f = io.open(pFile) + local res = "unknown" + if f then + res = f:read() + f:close() + end + return res +end + +function CprocUptime:proc(elapsed, lines) + CvProc.proc(self) + local uptime, idletime = readNum(self.pFile) + local vs = { + {name = "uptime", value = uptime}, + {name = "idletime", value = idletime}, + {name = "stamp", value = os.time()}, + } + self:appendLine(self:_packProto("uptime", nil, vs)) + + local totalTime = elapsed * self._counter + if totalTime >= 10 * 60 then -- report by hour + local dummyValue = {{name = "dummy", value=1.0}} + local labels = readUname() + self:appendLine(self:_packProto("uname", labels, dummyValue)) + local releaseInfo = {{name = "release", index = readRelease(self._release)}} + self:appendLine(self:_packProto("system_release", releaseInfo, dummyValue)) + self._counter = 0 + else + self._counter = self._counter + 1 + end + self:push(lines) +end + +return CprocUptime diff --git a/source/tools/monitor/unity/collector/vproc.lua b/source/tools/monitor/unity/collector/vproc.lua index 33c016a5551bd4dff1cecd579e9e2272c80cdb05..05024bc8e2ebfd9a608aa97f838ffdd6324b7366 100644 --- a/source/tools/monitor/unity/collector/vproc.lua +++ b/source/tools/monitor/unity/collector/vproc.lua @@ -31,11 +31,12 @@ function CvProc:copyLine(line) end function CvProc:push(lines) - for _, v in ipairs(self._lines["lines"]) do - table.insert(lines["lines"], v) + local c = #lines["lines"] -- not for #lines + for _, line in ipairs(self._lines["lines"]) do + c = c + 1 + lines["lines"][c] = line end self._lines = nil - return lines end function CvProc:_packProto(head, labels, vs, log) diff --git a/source/tools/monitor/unity/common/dockerinfo.lua b/source/tools/monitor/unity/common/dockerinfo.lua new file mode 100644 index 0000000000000000000000000000000000000000..9bbce2ced08f0681ad39532d23692996b615f81b --- /dev/null +++ b/source/tools/monitor/unity/common/dockerinfo.lua @@ -0,0 +1,104 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liuxinwnei. +--- DateTime: 2023/02/08 17:00 PM +--- + +dockerinfo = {} +local posix = require("posix") +local cjson = require("cjson") +local json = cjson.new() +local pystring = require("common.pystring") +local stat = require("posix.sys.stat") + +function file_exists(file) + local f=stat.lstat(file) + if f ~= nil then + return true + else + return false + end +end + +function dockerinfo:get_hostfs() + local proc_fs="/mnt/host/proc/" + local sys_fs="/mnt/host/sys/" + local pods_fs="/mnt/host/var/lib/kubelet/pods/" + local root_fs = "/mnt/host/" + if file_exists(proc_fs) then + return proc_fs, sys_fs, pods_fs, root_fs + end + proc_fs="/proc/" + sys_fs="/sys/" + pods_fs="/var/lib/kubelet/pods/" + root_fs = "/" + return proc_fs, sys_fs, pods_fs, root_fs +end + +function get_runtimesock() + local root_fs = "" + _, _, _, root_fs = dockerinfo:get_hostfs() + local runtime = "docker" + local runtime_sock = root_fs .. "var/run/docker.sock" + local sock={"var/run/docker.sock","run/containerd/containerd.sock", "var/run/dockershim.sock"} + for _,runtimex in pairs(sock) do + if file_exists(root_fs .. runtimex) then + runtime_sock = root_fs .. runtimex + if not string.find(runtime_sock,"docker.sock") then + runtime = "crictl" + end + end + end + return runtime,runtime_sock +end + +function dockerinfo:get_inspect(did) + local runtime,runtime_sock = get_runtimesock() + if runtime == "docker" then + return get_container_inspect(did) + else + return get_crictl_inspect(did) + end +end + +function dockerinfo:get_dockerid(pid) + local proc_fs = dockerinfo:get_hostfs() + local idstring = "unknow" + if not file_exists(proc_fs .. pid .. "/cgroup") then return idstring end + local cmd = "cat " .. proc_fs .. pid .. "/cgroup 2>/dev/null | grep memory:" + local pfile = io.popen(cmd,"r") + local res = pfile:read("*a") + pfile:close() + + if not string.find(res,"kubepods") and not string.find(res,"docker%-") then return idstring end + if string.find(res,"docker%-") then + idstring = pystring:split(res,"docker-")[2] + elseif string.find(res,"cri%-containerd%-") then + idstring = pystring:split(res,"cri-containerd-")[2] + else + local tmp = pystring:split(res,"/",10) + idstring = tmp[#tmp] + end + idstring = string.sub(idstring,0,8) + return idstring +end + +function get_container_inspect(did) + local runtime, runtime_sock = get_runtimesock() + local cmd = "curl --silent -XGET --unix-socket " .. runtime_sock .. " http://localhost/containers/" .. did .. "/json 2>/dev/null " + local f = io.popen(cmd,"r") + local res = f:read("*a") + f:close() + return res +end + +function get_crictl_inspect(did) + local runtime, runtime_sock = get_runtimesock() + local cmd = runtime .. " -r " .. runtime_sock .. " inspect " .. did .. " 2>/dev/null " + local f = io.popen(cmd,"r") + local res = f:read("*a") + f:close() + return res +end + +return dockerinfo diff --git a/source/tools/monitor/unity/common/lineParse.lua b/source/tools/monitor/unity/common/lineParse.lua index ee8de957297ffb94db912e101677128c09dfaa2d..33d6486a0e831608f41ce0956c6e87539b1be50e 100644 --- a/source/tools/monitor/unity/common/lineParse.lua +++ b/source/tools/monitor/unity/common/lineParse.lua @@ -81,18 +81,23 @@ function module.pack(title, ls, vs) local line = title if system:keyCount(ls) > 0 then local lss = {} + local c = 0 for k, v in pairs(ls) do - table.insert(lss, k .. "=" .. v) + c = c + 1 + lss[c] = table.concat({k, v}, "=") end line = line .. ',' .. pystring:join(",", lss) end local vss = {} + local c = 0 for k, v in pairs(vs) do local tStr = type(v) if tStr == "number" then - table.insert(vss, k .. '=' .. tostring(v)) + c = c + 1 + vss[c] = table.concat({k, tostring(v)}, "=") elseif tStr == "string" then - table.insert(vss, k .. '=' .. json.encode(v)) + c = c + 1 + vss[c] = table.concat({k, json.encode(v)}, "=") else error("bad value type for " .. tStr) end diff --git a/source/tools/monitor/unity/common/lmd.lua b/source/tools/monitor/unity/common/lmd.lua index 67a0e1bf8693ec77bb142e8b913530e272ade0b6..bb22f611e6767a9a603ebb4562473e5affc20d31 100644 --- a/source/tools/monitor/unity/common/lmd.lua +++ b/source/tools/monitor/unity/common/lmd.lua @@ -10,6 +10,7 @@ require("common.class") local pystring = require("common.pystring") local Clmd = class("lmd") +local srcPath = "" function Clmd:_init_() self._escs = '\\`*_{}[]()>#+-.!' @@ -98,9 +99,24 @@ local function pCode(s) end end +local function images(s) + local name, link = unpack(pystring:split(s, "](", 1)) + name = string.sub(name, 3) -- ![]() + link = string.sub(link, 1, -2) + + if string.sub(name, -1, -1) == "\\" then + return s + end + if string.sub(link, -1, -1) == "\\" then + return s + end + local path = srcPath .. link + return string.format('%s', path, name) +end + local function links(s) local name, link = unpack(pystring:split(s, "](", 1)) - name = string.sub(name, 2) + name = string.sub(name, 2) -- []() link = string.sub(link, 1, -2) if string.sub(name, -1, -1) == "\\" then return s @@ -111,6 +127,10 @@ local function links(s) return string.format('%s', link, name) end +local function pImages(s) + return string.gsub(s, "!%[.-%]%(.-%)", function(s) return images(s) end) +end + local function pLink(s) return string.gsub(s, "%[.-%]%(.-%)", function(s) return links(s) end) end @@ -121,7 +141,7 @@ local function Quotes(quotes, res) local level = 1 for i = start, len do local levels, body = unpack(pystring:split(quotes[i], " ", 1)) - local v = string.len(levels) + local v = #levels if v > level then while v > level do table.insert(res, "
") @@ -353,6 +373,7 @@ function Clmd:seg(s) s = pItalic(s) s = pDelete(s) s = pCode(s) + s = pImages(s) s = pLink(s) return pEscape(s) end @@ -362,11 +383,12 @@ function Clmd:pSeg(s) return pystring:join("", {"

", pEnter(s), "

"}) end -function Clmd:toHtml(md) +function Clmd:toHtml(md, path) local mds = pystring:split(md, '\n') local res = {} local len = #mds local stop = 0 + srcPath = path or "" for i = 1, len do local line = mds[i] diff --git a/source/tools/monitor/unity/common/pystring.lua b/source/tools/monitor/unity/common/pystring.lua index 25278335fd9d1003e143608efe260766908b6e2b..a499ee85cafe28c4bc836128f696a2595f802e8f 100644 --- a/source/tools/monitor/unity/common/pystring.lua +++ b/source/tools/monitor/unity/common/pystring.lua @@ -40,12 +40,9 @@ local function newStack() return stack end -local function checkDelimiter(ch) - local s = "().%+-*?[]^$" - if ch == " " then - return "%s" - end - for c in string.gmatch(s, ".") do +local luaReReserve = "().%+-*?[]^$" +local function checkLuaReReserve(ch) + for c in string.gmatch(luaReReserve, ".") do if c == ch then return "%" .. ch end @@ -55,8 +52,14 @@ end local function setupDelimiter(delimiter) local rt = {} + local i = 0 for c in string.gmatch(delimiter, ".") do - table.insert(rt, checkDelimiter(c)) + i = i + 1 + if c == " " then + rt[i] = "%s" + else + rt[i] = checkLuaReReserve(c) + end end return table.concat(rt) end @@ -71,31 +74,284 @@ local function setupPatten(s) return patten end +local function _setupRepl(repl) + local rt = {} + local i = 0 + for c in string.gmatch(repl, ".") do + i = i + 1 + rt[i] = checkLuaReReserve(c) + end + return table.concat(rt) +end + +local function setupRepl(s) + if s == nil then + error("repl must be a string.") + else + return _setupRepl(s) + end +end + +function pystring:shift(s, n) -- position for right, negative for left + local len = #s + if len == 0 then + return s + end + n = n % len + if n == 0 then + return s + elseif n > 0 then -- "abcd >> 1" + local offset = len - n + local s1 = string.sub(s, offset + 1) + local s2 = string.sub(s, 1, offset) + return s1 .. s2 + else -- "abcd << 1" + local offset = len + n + local s1 = string.sub(s, offset + 1) + local s2 = string.sub(s, 1, offset) + return s2 .. s1 + end +end + +function pystring:islower(s) + local match = string.match(s, "[%l%s%p]+") + if not match then + return false + end + return #match == #s +end + +function pystring:isupper(s) + local match = string.match(s, "[%u%s%p]+") + if not match then + return false + end + return #match == #s +end + +function pystring:isdigit(s) + local match = string.match(s, "%d+") + if not match then + return false + end + return #match == #s +end + +function pystring:ishex(s) + local match = string.match(s, "%x+") + if not match then + return false + end + return #match == #s +end + +function pystring:isalnum(s) + local match = string.match(s, "%w+") + if not match then + return false + end + return #match == #s +end + +function pystring:istilte(s) + local match = string.match(s, "%u%l*") + if not match then + return false + end + return #match == #s +end + +function pystring:isfloat(s) + local dotCnt = 0 + local ascDot = string.byte(".") + local asc0, asc9 = string.byte("0"), string.byte("9") + for i = 1, #s do + local ch = s:byte(i) + if ch == ascDot then + dotCnt = dotCnt + 1 + if dotCnt > 1 then + return false + end + elseif ch > asc9 or ch < asc0 then + return false + end + end + return true +end + +function pystring:lower(s) + return string.lower(s) +end + +function pystring:casefold(s) + return string.lower(s) +end + +function pystring:upper(s) + return string.upper(s) +end + +function pystring:swapcase(s) + local swaps = {} + local ascA, ascZ, asc_a, asc_z = string.byte('A'), string.byte('Z'), string.byte('a'), string.byte('z') + for i=1, #s do + local ch = string.byte(s, i) + if ch >= ascA and ch <= ascZ then + swaps[i] = string.char(ch + 32) + elseif ch >= asc_a and ch <= asc_z then + swaps[i] = string.char(ch - 32) + else + swaps[i] = string.char(ch) + end + end + return table.concat(swaps) +end + +function pystring:capitalize(s) + if #s < 1 then + return s + end + local s1 = string.sub(s, 1, 1) + local s2 = string.sub(s, 2) + return string.upper(s1) .. s2 +end + +function pystring:title(s) + if #s < 1 then + return s + end + local ss = pystring:split(s, " ") + for i = 1, #ss do + ss[i] = pystring:capitalize(ss[i]) + end + return table.concat(ss, " ") +end + +function pystring:capwords(s) + local lines = pystring:split(s, "\n") + local rLines = {} + for i, line in ipairs(lines) do + local rWords = {} + local words = pystring:split(line, " ") + for j, word in ipairs(words) do + rWords[j] = pystring:capitalize(word) + end + rLines[i] = table.concat(rWords, " ") + end + return table.concat(rLines, "\n") +end + +function pystring:ljust(s, len, ch) + ch = ch or " " + if #ch ~= 1 then + error("pad string master a single word, not " .. ch) + end + local delta = len - #s + if delta > 0 then + local pad = string.rep(ch, delta) + return pad .. s + else + return s + end +end + +function pystring:rjust(s, len, ch) + ch = ch or " " + if #ch ~= 1 then + error("pad string master a single word, not " .. ch) + end + local delta = len - #s + if delta > 0 then + local pad = string.rep(ch, delta) + return s .. pad + else + return s + end +end + +function pystring:center(s, len, ch) + ch = ch or " " + if #ch ~= 1 then + error("pad string master a single word, not " .. ch) + end + local delta = len - #s + if delta > 0 then + local left = math.floor(delta / 2) + local right = delta - left + + local res = {string.rep(ch, left), s, string.rep(ch, right)} + return table.concat(res) + else + return s + end +end + +function pystring:zfill(s, len) + return pystring:ljust(s, len, "0") +end + function pystring:split(s, delimiter, n) local result = {} + if not delimiter or delimiter == "" then -- for blank, gsub multi blank to single + s = string.gsub(s, "%s+", " ") + end local delimiter = setupDelimiter(delimiter or " ") local n = n or 2 ^ 63 - 1 local nums = 0 local beg = 1 + local c = 0 while (true) do local iBeg, iEnd = string.find(s, delimiter, beg) if (iBeg) then - table.insert(result, string.sub(s, beg, iBeg - 1)) + c = c + 1 + result[c] = string.sub(s, beg, iBeg - 1) beg = iEnd + 1 nums = nums + 1 if nums >= n then - table.insert(result, string.sub(s, beg, string.len(s))) + c = c + 1 + result[c] = string.sub(s, beg, #s) break end else - table.insert(result, string.sub(s, beg, string.len(s))) + c = c + 1 + result[c] = string.sub(s, beg, #s) break end end return result end +function pystring:partition(s, del) + local result = {} + del = del or " " + local delimiter = setupDelimiter(del) + local iBeg, iEnd = string.find(s, delimiter) + if iBeg then + result[1] = string.sub(s, 1, iBeg - 1) + result[2] = del + result[3] = string.sub(s, iEnd + 1) + return result + else + return nil + end +end + +function pystring:partition(s, del) + local result = {} + del = del or " " + local delimiter = setupDelimiter(del) + local iBeg, iEnd = string.find(s, delimiter) + if iBeg then + result[1] = string.sub(s, 1, iBeg - 1) + result[2] = del + result[3] = string.sub(s, iEnd + 1) + return result + else + return nil + end +end + function pystring:reverseTable(t) local n = #t for i = 1, n / 2 do @@ -106,25 +362,29 @@ end function pystring:rsplit(s, delimiter, n) local result = {} local n = n or 2 ^ 63 - 1 - local len = string.len(s) + 1 + local len = #s + 1 local rs = string.reverse(s) local rDel = string.reverse(delimiter or " ") rDel = setupDelimiter(rDel) local nums = 0 local beg = 1 + local c = 0 while (true) do local iBeg, iEnd = string.find(rs, rDel, beg) if (iBeg) then - table.insert(result, string.sub(s, len - (iBeg - 1),len - beg)) + c = c + 1 + result[c] = string.sub(s, len - (iBeg - 1),len - beg) beg = iEnd + 1 nums = nums + 1 if nums >= n then - table.insert(result, string.sub(s, 1, len - beg)) + c = c + 1 + result[c] = string.sub(s, 1, len - beg) break end else - table.insert(result, string.sub(s, 1, len - beg)) + c = c + 1 + result[c] = string.sub(s, 1, len - beg) break end end @@ -133,6 +393,29 @@ function pystring:rsplit(s, delimiter, n) return result end +function pystring:rpartition(s, del) + local result = {} + del = del or " " + local rs = string.reverse(s) + local rDel = string.reverse(del) + local delimiter = setupDelimiter(rDel) + local len = #s + + local iBeg, iEnd = string.find(rs, delimiter) + if iBeg then + result[1] = string.sub(s, 1, len - iBeg + 1 - #del) + result[2] = del + result[3] = string.sub(s, len - iEnd + 1 + #del) + return result + else + return nil + end +end + +function pystring:splitlines(s) + return pystring:split(s, '\n') +end + function pystring:lstrip(s, chars) local patten = "^" .. setupPatten(chars) .. "+" local _, ends = string.find(s, patten) @@ -163,15 +446,73 @@ function pystring:join(delim, strings) end function pystring:startswith(s1, s2) - return string.sub(s1,1,string.len(s2)) == s2 + return string.sub(s1,1, #s2) == s2 end function pystring:endswith(s1, s2) - return s2=='' or string.sub(s1,-string.len(s2)) == s2 + return s2 == '' or string.sub(s1,-#s2) == s2 +end + +function pystring:find(s1, s2, start, stop) + start = start or 1 + stop = stop or -1 + s1 = string.sub(s1, start, stop) + local res = string.find(s1, s2, 1, false) + return res or -1 +end + +function pystring:rfind(s1, s2, start, stop) + start = start or 1 + stop = stop or -1 + s1 = string.sub(s1, start, stop) + + local len = #s1 + local lFind = #s2 + local rs1, rs2 = string.reverse(s1), string.reverse(s2) + local i = string.find(rs1, rs2, 1, false) + if i then + return len - i - lFind + 1 + else + return -1 + end +end + +function pystring:index(s1, s2, start, stop) + local res = pystring:find(s1, s2, start, stop) + if res < 0 then + error(s2 .. " is not in " .. s1) + end + return res +end + +function pystring:rindex(s1, s2, start, stop) + local res = pystring:rfind(s1, s2, start, stop) + if res < 0 then + error(s2 .. " is not in " .. s1) + end + return res +end + +function pystring:count(s, find) + local i = 0 + local patten = setupPatten(find) + for _ in string.gmatch(s, patten) do + i = i + 1 + end + return i +end + +function pystring:replace(s, find, repl, n) + local patten = setupPatten(find) + repl = setupRepl(repl) + + return string.gsub(s, patten, repl, n) end -function pystring:find(s1, s2) - return string.find(s1, s2, 1, false) +function pystring:expandtabs(s, tabs) + tabs = tabs or 4 + local repl = string.rep(" ", tabs) + return string.gsub(s, "\t", repl) end return pystring diff --git a/source/tools/monitor/unity/common/system.lua b/source/tools/monitor/unity/common/system.lua index 1c083430d397f0f951fae47ee0afa152355510eb..e2173ded1633840f4a20c4029695d84dfd281a96 100644 --- a/source/tools/monitor/unity/common/system.lua +++ b/source/tools/monitor/unity/common/system.lua @@ -89,6 +89,20 @@ function system:dictCopy(tbl) return cp end +function system:listMerge(...) + local res = {} + local i = 1 + for _, vt in ipairs{...} do + if type(vt) == "table" then + for _, v in ipairs(vt) do + res[i] = v + i = i + 1 + end + end + end + return res +end + function system:hex2ups(hex) return (string.gsub(hex, ".", function (c) return string.format("%02X", string.byte(c)) @@ -111,6 +125,29 @@ function system:hexdump(buf) end end +local htmlRep = { + ["<"] = function() return "<" end, + [">"] = function() return ">" end, + ["&"] = function() return "&" end, + ['"'] = function() return """ end, + ["\t"] = function() return " " end, +} +local function esc_html(s) + return htmlRep[s]() +end +function system:escHtml(s) + local reHtml = '[<>&"\t]' + return string.gsub(s, reHtml, function(s) return esc_html(s) end) +end + +local function esc_md(s) + return "\\" .. s +end +function system:escMd(s) + local reFmt = "[\\`%*_%{%}%[%]%(%)#%+%-%.!|]" + return string.gsub(s, reFmt, function(s) return esc_md(s) end) +end + function system:timeRfc1123(t) t = t or os.time() return os.date("!%a, %d %b %Y %H:%M:%S GMT", t) @@ -119,10 +156,18 @@ end function system:parseYaml(fYaml) local lyaml = require("lyaml") local f = io.open(fYaml,"r") + if not f then + error("file: " .. fYaml .. " is not exist.") + end local s = f:read("*all") f:close() return lyaml.load(s) end +function system:posixError(msg, err, errno) + local s = msg .. string.format(": %s, errno: %d", err, errno) + error(s) +end + return system \ No newline at end of file diff --git a/source/tools/monitor/unity/guide/oop/base.lua b/source/tools/monitor/unity/guide/oop/base.lua deleted file mode 100644 index b118f20368bf97ae79c201b01a4ddb1614fd749e..0000000000000000000000000000000000000000 --- a/source/tools/monitor/unity/guide/oop/base.lua +++ /dev/null @@ -1,22 +0,0 @@ ---- ---- Generated by EmmyLua(https://github.com/EmmyLua) ---- Created by liaozhaoyan. ---- DateTime: 2022/12/16 10:23 AM ---- -require("class") - -local Cbase = class("base") - -function Cbase:_init_(name) - self.name = name -end - -function Cbase:hello() - return self.name -end - -function Cbase:_del_() - print("base del..." .. self.name) -end - -return Cbase diff --git a/source/tools/monitor/unity/guide/oop/one.lua b/source/tools/monitor/unity/guide/oop/one.lua deleted file mode 100644 index 1e432f0ccc0f771e4c0a77711b75bb973237253e..0000000000000000000000000000000000000000 --- a/source/tools/monitor/unity/guide/oop/one.lua +++ /dev/null @@ -1,20 +0,0 @@ ---- ---- Generated by EmmyLua(https://github.com/EmmyLua) ---- Created by liaozhaoyan. ---- DateTime: 2022/12/16 10:20 AM ---- - -require("class") -local Cbase = require("base") - -Cone = class("one", Cbase) - -function Cone:_init_(name) - Cbase._init_(self, name) -end - -function Cone:say() - print("one say " .. self.name) -end - -return Cone diff --git a/source/tools/monitor/unity/guide/oop/three.lua b/source/tools/monitor/unity/guide/oop/three.lua deleted file mode 100644 index cae5dd7371f0c2492015b05011d079ad63fd8007..0000000000000000000000000000000000000000 --- a/source/tools/monitor/unity/guide/oop/three.lua +++ /dev/null @@ -1,24 +0,0 @@ ---- ---- Generated by EmmyLua(https://github.com/EmmyLua) ---- Created by liaozhaoyan. ---- DateTime: 2022/12/16 1:07 PM ---- - -require("class") -local Cone = require("one") - -CThree = class("three", Cone) - -function CThree:_init_(name) - Cone._init_(self, name) - self._child = Cone.new("child") -end - - -function CThree:say() - print("three say " .. self.name) - print("child say.") - self._child:say() -end - -return CThree \ No newline at end of file diff --git a/source/tools/monitor/unity/guide/oop/tobj.lua b/source/tools/monitor/unity/guide/oop/tobj.lua deleted file mode 100644 index 356c9ece70f37cef290499988c202fec121c714b..0000000000000000000000000000000000000000 --- a/source/tools/monitor/unity/guide/oop/tobj.lua +++ /dev/null @@ -1,25 +0,0 @@ ---- ---- Generated by EmmyLua(https://github.com/EmmyLua) ---- Created by liaozhaoyan. ---- DateTime: 2022/12/16 10:35 AM ---- -package.path = package.path .. ";../../common/?.lua;" - -local Cone = require("one") -local Ctwo = require("two") -local CThree = require("three") - -local one = Cone.new("1one") -local two = Ctwo.new("2two") -local three = CThree.new("3three") - -assert(one:hello() == "1one") -assert(two:hello() == "2two") -assert(three:hello() == "3three") - -one:say() -two:say() -three:say() - -one = nil -two:say() diff --git a/source/tools/monitor/unity/guide/oop/two.lua b/source/tools/monitor/unity/guide/oop/two.lua deleted file mode 100644 index 8c8de5b04c5d62d577de0a023ae9e862daa7b791..0000000000000000000000000000000000000000 --- a/source/tools/monitor/unity/guide/oop/two.lua +++ /dev/null @@ -1,22 +0,0 @@ ---- ---- Generated by EmmyLua(https://github.com/EmmyLua) ---- Created by liaozhaoyan. ---- DateTime: 2022/12/16 10:33 AM ---- ---- -require("class") -local Cone = require("one") - -CTwo = class("two", Cone) - -function CTwo:_init_(name) - Cone._init_(self, name) -end - -function CTwo:say() - print("two say " .. self.name) - print("super") - Cone.say(self) -end - -return CTwo \ No newline at end of file diff --git a/source/tools/monitor/unity/httplib/dockerApi.lua b/source/tools/monitor/unity/httplib/dockerApi.lua new file mode 100644 index 0000000000000000000000000000000000000000..ee7c395b3aad72d0d3043cea4e0dd375de86914b --- /dev/null +++ b/source/tools/monitor/unity/httplib/dockerApi.lua @@ -0,0 +1,489 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/10 12:00 AM +--- + +local client = require("http.client") +local headers = require("http.headers") +local util = require("http.util") + +local cjson = require("cjson.safe") +local basexx = require("basexx") + +local handle_response_body = function (body) + if type(body) == 'string' then + local res = cjson.decode(body) + return res ~= nil and res or body + else + return nil + end +end + +local validate_instance = function (instance) + if type(instance) ~= 'table' then + return nil, "not a table" + end + + if instance.host == nil then + return nil, "missing host" + end + + if instance.path == nil then + return nil, "missing path" + end + + if instance.version == nil then + return nil, "missing version" + end + + return true +end + +local perform_request = function (instance, method, endpoint, query, authority, body) + local response_body + local response_headers + local err, errn + local wh_failure, wb_failure + local connection, stream + local instance_check + + instance_check, err = validate_instance(instance) + + if instance_check == nil then + return instance_check, err + end + + if endpoint == nil then + return endpoint, "endpoint not defined" + end + + if type(endpoint) ~= 'string' then + return nil, "endpoint should be a string" + end + + connection, err, errn = client.connect { + host = instance.host, + path = instance.path, + version = 1.1, + sendname = true, + port = 80, + tls = false + } + + -- error while making connection + + if connection == nil then + return connection, err, errn + end + + stream = connection:new_stream() + + -- error while creating stream + + if stream == nil then + return stream, err, errn + end + + -- prepare headers + + local h = headers.new() + + h:append(':method', method or 'GET') + + -- HTTP 1.1 seems to require this header + + h:append(':authority', '') + + h:append(':path', string.format( + '/%s%s%s', + instance.version, + endpoint, + (type(query) == 'table') and '?' .. util.dict_to_query(query) or '' + )) + + h:append('content-type', body and 'application/json' or 'text/plain') + h:append('user-agent', 'lua-docker') + + -- docker uses a custom authority header + + if authority then + local json_authority, e = cjson.encode(authority) + if json_authority == nil then + return nil, e, nil + end + local base64_encoded_authority = basexx.to_base64(json_authority) + h:append('X-Registry-Auth', base64_encoded_authority) + end + + local encoded_body, e + + if body ~= nil then + if type(body) == 'table' then + encoded_body, e = cjson.encode(body) + if encoded_body == nil then + return nil, e, nil + end + else + encoded_body = tostring(body) + end + h:append('content-length', tostring(#encoded_body)) + end + + -- write data to stream + + local end_after_headers = true + + if body then end_after_headers = false end + + wh_failure, err, errn = stream:write_headers(h, end_after_headers) + + -- error while writing headers to stream + + if wh_failure == nil then + return wh_failure, err, errn + end + + if body then + wb_failure, err, errn = stream:write_body_from_string(encoded_body) + + -- error while writing body to stream + + if wb_failure == nil then + return wb_failure, err, errn + end + end + + -- read response + + response_headers, err, errn = stream:get_headers() + + -- error getting response headers + + if response_headers == nil then + return response_headers, err, errn + end + + + response_body, err, errn = stream:get_body_as_string() + + -- error getting response body + + if response_body == nil then + return response_body, err, errn + end + + -- successfull response + + return { + body = handle_response_body(response_body), + headers = response_headers, + status = tonumber(response_headers:get(':status')) + } +end + +local loop_through_entity_endpoints = function (endpoint_data, group, target_table) + for k, v in pairs(endpoint_data) do + target_table[k] = function (self, name_or_id, query, authority, body) + return perform_request( + self, v.method, + string.format( + '/%s/%s%s', group, name_or_id, + v.endpoint and ('/' .. v.endpoint) or '' + ), + query, + authority, + body + ) + end + end +end + +-- @todo handle streaming responses +-- example: functions which return logs +-- also provide a streaming variant +-- those endpoints have a bool follow +-- query parameter set to true + +return { + new = function (host, path, version) + local d = { + host = host or 'localhost', + path = path or '/var/run/docker.sock', + version = version or 'v1.38', + + custom = perform_request, + + get_version = function (self) + return perform_request(self, 'GET', '/version') + end, + + list_containers = function (self, query) + return perform_request(self, 'GET', '/containers/json', query) + end, + + create_container = function (self, query, body) + return perform_request(self, 'POST', '/containers/create', query, nil, body) + end, + + update_container = function (self, name_or_id, body) + return perform_request( + self, 'POST', + string.format('/containers/%s/%s', name_or_id, 'update'), + nil, nil, body + ) + end, + + delete_stopped_containers = function (self, query) + return perform_request(self, 'POST', '/containers/prune', query) + end, + + -- @todo missing endpoints: + -- export_container + -- get_container_stats + -- attach_to_container + -- attach_to_container_ws + -- extract_archive_to_container_dir + + list_images = function (self, query) + return perform_request(self, 'GET', '/images/json', query) + end, + + delete_builder_cache = function (self) + return perform_request(self, 'POST', '/build/prune') + end, + + create_image = function (self, query, auth, body) + return perform_request(self, 'POST', '/images/create', query, auth, body) + end, + + search_image = function (self, query) + return perform_request(self, 'GET', '/images/search', query) + end, + + delete_unused_images = function (self, query) + return perform_request(self, 'POST', '/images/prune', query) + end, + + create_image_from_container = function (self, query, body) + return perform_request(self, 'POST', '/commit', query, nil, body) + end, + + -- @todo missing endpoints: + -- build_image + -- export_image + -- export_images + -- import_images + + list_networks = function (self, query) + return perform_request(self, 'GET', '/networks', query) + end, + + create_network = function (self, body) + return perform_request(self, 'POST', '/networks/create', nil, nil, body) + end, + + delete_unused_networks = function (self, query) + return perform_request(self, 'POST', '/networks/prune', query) + end, + + list_volumes = function (self, query) + return perform_request(self, 'GET', '/volumes', query) + end, + + create_volume = function (self, body) + return perform_request(self, 'POST', '/volumes/create', nil, nil, body) + end, + + delete_unused_volumes = function (self, query) + return perform_request(self, 'POST', '/volumes/prune', query) + end, + + inspect_swarm = function (self) + return perform_request(self, 'GET', '/swarm') + end, + + initialize_swarm = function (self, body) + return perform_request(self, 'POST', '/swarm/init', nil, nil, body) + end, + + join_swarm = function (self, body) + return perform_request(self, 'POST', '/swarm/join', nil, nil, body) + end, + + leave_swarm = function (self, query) + return perform_request(self, 'POST', '/swarm/leave', query) + end, + + update_swarm = function (self, query, body) + return perform_request(self, 'POST', '/swarm/update', query, nil, body) + end, + + get_swarm_unlockkey = function (self) + return perform_request(self, 'GET', '/swarm/unlockkey') + end, + + unlock_swarm_manager = function (self, body) + return perform_request(self, 'POST', '/swarm/unlock', nil, nil, body) + end, + + list_nodes = function (self, query) + return perform_request(self, 'GET', '/nodes', query) + end, + + list_services = function (self, query) + return perform_request(self, 'GET', '/services', query) + end, + + create_service = function (self, auth, body) + return perform_request(self, 'POST', '/services/create', nil, auth, body) + end, + + list_tasks = function (self, query) + return perform_request(self, 'GET', '/tasks', query) + end, + + list_secrets = function (self, query) + return perform_request(self, 'GET', '/secrets', query) + end, + + create_secret = function (self, body) + return perform_request(self, 'POST', '/secrets/create', nil, nil, body) + end, + + list_configs = function (self, query) + return perform_request(self, 'GET', '/configs', query) + end, + + create_config = function (self, body) + return perform_request(self, 'POST', '/configs/create', nil, nil, body) + end, + + list_plugins = function (self, query) + return perform_request(self, 'GET', '/plugins', query) + end, + + get_plugin_privileges = function (self, query) + return perform_request(self, 'GET', '/plugins/privileges', query) + end, + + install_plugin = function (self, query, auth, body) + return perform_request(self, 'POST', '/plugins/pull', query, auth, body) + end, + + create_plugin = function (self, query, body) + return perform_request(self, 'POST', '/plugins/create', query, nil, body) + end, + + check_auth_config = function (self, body) + return perform_request(self, 'POST', '/auth', nil, nil, body) + end, + + get_system_info = function (self) + return perform_request(self, 'GET', '/info') + end, + + ping_server = function (self) + return perform_request(self, 'GET', '/_ping') + end, + + -- @todo missing endpoints: + -- monitor_events + + get_usage = function (self) + return perform_request(self, 'GET', '/system/df') + end, + } + + loop_through_entity_endpoints({ + ['list_container_processes'] = { method = 'GET', endpoint = 'top' }, + ['inspect_container'] = { method = 'GET', endpoint = 'json' }, + ['get_container_logs'] = { method = 'GET', endpoint = 'logs' }, + ['get_container_fs_changes'] = { method = 'GET', endpoint = 'changes' }, + ['resize_container_tty'] = { method = 'POST', endpoint = 'resize' }, + ['start_container'] = { method = 'POST', endpoint = 'start' }, + ['stop_container'] = { method = 'POST', endpoint = 'stop' }, + ['restart_container'] = { method = 'POST', endpoint = 'restart' }, + ['kill_container'] = { method = 'POST', endpoint = 'kill' }, + ['rename_container'] = { method = 'POST', endpoint = 'rename' }, + ['pause_container'] = { method = 'POST', endpoint = 'pause' }, + ['resume_container'] = { method = 'POST', endpoint = 'unpause' }, + ['wait_for_container'] = { method = 'POST', endpoint = 'wait' }, + ['remove_container'] = { method = 'DELETE' }, + ['get_container_resource_info'] = { method = 'HEAD', endpoint = 'archive' }, + ['get_container_resource_archive'] = { method = 'GET', endpoint = 'archive' }, + ['create_exec_instance'] = { method = 'POST', endpoint = 'exec' }, + }, 'containers', d) + + loop_through_entity_endpoints({ + ['inspect_image'] = { method = 'GET', endpoint = 'json' }, + ['get_image_history'] = { method = 'GET', endpoint = 'history' }, + ['push_image'] = { method = 'POST', endpoint = 'push' }, + ['tag_image'] = { method = 'POST', endpoint = 'tag' }, + ['remove_image'] = { method = 'DELETE' }, + }, 'images', d) + + loop_through_entity_endpoints({ + ['inspect_network'] = { method = 'GET' }, + ['remove_network'] = { method = 'DELETE' }, + ['connect_container_to_network'] = { method = 'POST', endpoint = 'connect' }, + ['disconnect_container_from_network'] = { method = 'POST', endpoint = 'disconnect' }, + }, 'networks', d) + + loop_through_entity_endpoints({ + ['inspect_volume'] = { method = 'GET' }, + ['remove_volume'] = { method = 'DELETE' }, + }, 'volumes', d) + + loop_through_entity_endpoints({ + ['start_exec_instance'] = { method = 'POST', endpoint = 'start' }, + ['resize_exec_instance'] = { method = 'POST', endpoint = 'resize' }, + ['inspect_exec_instance'] = { method = 'GET', endpoint = 'json' }, + }, 'exec', d) + + loop_through_entity_endpoints({ + ['inspect_node'] = { method = 'GET' }, + ['delete_node'] = { method = 'DELETE' }, + ['update_node'] = { method = 'POST', endpoint = 'update' }, + }, 'nodes', d) + + loop_through_entity_endpoints({ + ['inspect_service'] = { method = 'GET' }, + ['delete_service'] = { method = 'DELETE' }, + ['update_service'] = { method = 'POST', endpoint = 'update' }, + ['get_service_logs'] = { method = 'GET', endpoint = 'logs' }, + }, 'services', d) + + loop_through_entity_endpoints({ + ['inspect_task'] = { method = 'GET' }, + }, 'tasks', d) + + loop_through_entity_endpoints({ + ['inspect_secret'] = { method = 'GET' }, + ['delete_secret'] = { method = 'DELETE' }, + ['update_secret'] = { method = 'POST', endpoint = 'update' }, + }, 'secrets', d) + + loop_through_entity_endpoints({ + ['inspect_config'] = { method = 'GET' }, + ['delete_config'] = { method = 'DELETE' }, + ['update_config'] = { method = 'POST', endpoint = 'update' }, + }, 'configs', d) + + loop_through_entity_endpoints({ + ['inspect_plugin'] = { method = 'GET', endpoint = 'json' }, + ['remove_plugin'] = { method = 'DELETE' }, + ['enable_plugin'] = { method = 'POST', endpoint = 'enable' }, + ['disable_plugin'] = { method = 'POST', endpoint = 'disable' }, + ['upgrade_plugin'] = { method = 'POST', endpoint = 'upgrade' }, + ['push_plugin'] = { method = 'POST', endpoint = 'push' }, + ['configure_plugin'] = { method = 'POST', endpoint = 'set' }, + }, 'plugins', d) + + loop_through_entity_endpoints({ + ['get_registry_image_info'] = { method = 'GET', endpoint = 'json' }, + }, 'distribution', d) + + return d + end +} diff --git a/source/tools/monitor/unity/httplib/httpBase.lua b/source/tools/monitor/unity/httplib/httpBase.lua index 7ddbc24e43b3c9aa16f0eff8ef7394103590b2af..3ec1ff4f4c09243a7b1965358e4816db35e307cf 100644 --- a/source/tools/monitor/unity/httplib/httpBase.lua +++ b/source/tools/monitor/unity/httplib/httpBase.lua @@ -5,6 +5,7 @@ --- require("common.class") +local system = require("common.system") local ChttpComm = require("httplib.httpComm") local ChttpBase = class("ChttpBase", ChttpComm) @@ -20,22 +21,44 @@ function ChttpBase:_install(frame) end end +function ChttpBase:_installRe(path, frame) + frame:registerRe(path, self) +end + function ChttpBase:echo(tRet, keep) error("ChttpBase:echo is a virtual function.") end local function checkKeep(tReq) local conn = tReq.header["connection"] - if conn and string.lower(conn) == "keep-alive" then - return true + if tReq.vers == "1.0" then + if conn and string.lower(conn) == "keep-alive" then + return true + else -- for http 1.0, close as default + return false + end + else + if conn and string.lower(conn) == "close" then + return false + else -- for http 1.1 and newer, for keep-alive as default + return true + end end - return false end function ChttpBase:call(tReq) + local keep = checkKeep(tReq) local tRet = self._urlCb[tReq.path](tReq) + local res = self:echo(tRet, keep) + + return res, keep +end + +function ChttpBase:calls(tReq) local keep = checkKeep(tReq) - return self:echo(tRet, keep), keep + local res = self:callRe(tReq, keep) + + return res, keep end return ChttpBase diff --git a/source/tools/monitor/unity/httplib/httpComm.lua b/source/tools/monitor/unity/httplib/httpComm.lua index 12b691f318d6f72b218f1be258387b8b909245bd..3fdff02b5ebb8c3e6c5a8fa3b63737c28c6852b3 100644 --- a/source/tools/monitor/unity/httplib/httpComm.lua +++ b/source/tools/monitor/unity/httplib/httpComm.lua @@ -87,12 +87,15 @@ function ChttpComm:packHeaders(headTable, len) -- just for http out. end local origin = originHeader() + local c = 0 for k, v in pairs(origin) do - table.insert(lines, k .. ": " .. v) + c = c + 1 + lines[c] = table.concat({k, v}, ": ") end for k, v in pairs(headTable) do - table.insert(lines, k .. ": " .. v) + c = c + 1 + lines[c] = table.concat({k, v}, ": ") end return pystring:join("\r\n", lines) .. "\r\n" end diff --git a/source/tools/monitor/unity/httplib/httpHtml.lua b/source/tools/monitor/unity/httplib/httpHtml.lua index 5205af6556a44472e8fee1d3873fc0b8f1d75eaa..91db3e8824d05686f379d7e9166c9eca2d342e97 100644 --- a/source/tools/monitor/unity/httplib/httpHtml.lua +++ b/source/tools/monitor/unity/httplib/httpHtml.lua @@ -5,13 +5,35 @@ --- require("common.class") +local unistd = require("posix.unistd") local pystring = require("common.pystring") +local system = require("common.system") local ChttpBase = require("httplib.httpBase") local ChttpHtml = class("ChttpHtml", ChttpBase) function ChttpHtml:_init_(frame) ChttpBase._init_(self) + + self._reCb = { + md = function(s, keep, suffix) return self:renderMd(s, keep, suffix) end, + html = function(s, keep, suffix) return self:renderHtml(s, keep, suffix) end, + jpg = function(s, keep, suffix) return self:renderImage(s, keep, "jpeg") end, + jpeg = function(s, keep, suffix) return self:renderImage(s, keep, suffix) end, + gif = function(s, keep, suffix) return self:renderImage(s, keep, suffix) end, + png = function(s, keep, suffix) return self:renderImage(s, keep, suffix) end, + bmp = function(s, keep, suffix) return self:renderImage(s, keep, suffix) end, + txt = function(s, keep, suffix) return self:renderText(s, keep, suffix) end, + text = function(s, keep, suffix) return self:renderText(s, keep, suffix) end, + log = function(s, keep, suffix) return self:renderText(s, keep, suffix) end, + } +end + +local function loadFile(fPpath) -- conform file already exist. + local f = io.open(fPpath,"r") + local s = f:read("*all") + f:close() + return s end function ChttpHtml:markdown(text) @@ -19,6 +41,43 @@ function ChttpHtml:markdown(text) return md:toHtml(text) end +function ChttpHtml:renderMd(s, keep, suffix) + local tmd = self:markdown(s) + local tRet = { + title = "markdown document render from beaver.", + content = tmd, + cType = "text/html", + } + return self:echo(tRet, keep) +end + +function ChttpHtml:renderHtml(s, keep, suffix) + local cType = "text/html" + return self:pack(cType, keep, s) +end + +function ChttpHtml:renderText(s, keep, suffix) + local cType = "text/plain" + return self:pack(cType, keep, s) +end + +function ChttpHtml:renderImage(s, keep, suffix) + local cType = "image/" .. suffix + return self:pack(cType, keep, s) +end + +function ChttpHtml:reSource(tReq, keep, head, srcPath) + local path = tReq.path + path = srcPath .. pystring:lstrip(path, head) + if unistd.access(path) then + local s = loadFile(path) + local _, suffix = unpack(pystring:rsplit(path, ".", 1)) + if system:keyIsIn(self._reCb, suffix) then + return self._reCb[suffix](s, keep, suffix) + end + end +end + local function htmlPack(title, content) local h1 = [[ @@ -41,16 +100,22 @@ local function htmlPack(title, content) return pystring:join("", bodies) end -function ChttpHtml:echo(tRet, keep) +function ChttpHtml:pack(cType, keep, body) local stat = self:packStat(200) local tHead = { - ["Content-Type"] = "text/html", + ["Content-Type"] = cType, ["Connection"] = (keep and "keep-alive") or "close" } - local body = htmlPack(tRet.title, tRet.content) local headers = self:packHeaders(tHead, #body) local tHttp = {stat, headers, body} return pystring:join("\r\n", tHttp) end +function ChttpHtml:echo(tRet, keep) + local cType = tRet.type or "text/html" + local body = htmlPack(tRet.title, tRet.content) + + return self:pack(cType, keep, body) +end + return ChttpHtml diff --git a/source/tools/monitor/unity/test/beaver/walkDir.lua b/source/tools/monitor/unity/test/beaver/walkDir.lua new file mode 100644 index 0000000000000000000000000000000000000000..27a0fd0d88b0692295af6a704abe56dac87891b7 --- /dev/null +++ b/source/tools/monitor/unity/test/beaver/walkDir.lua @@ -0,0 +1,43 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/2/18 12:13 PM +--- + +package.path = package.path .. ";../../?.lua;" +local dirent = require("posix.dirent") +local sysStat = require("posix.sys.stat") +local system = require("common.system") + +local function join(dir, fName) + local paths = {dir, fName} + return table.concat(paths, "/") +end + +local function walk(orig, dir, tbl) + local ls = dirent.dir(dir) + local len = string.len(orig) + for _, l in ipairs(ls) do + if l == ".." or l == '.' then + goto continue + end + local path = join(dir, l) + local stat, err, errno = sysStat.stat(path) + if stat then + local mode = stat.st_mode + if sysStat.S_ISDIR(mode) > 0 then + walk(orig, path, tbl) + elseif sysStat.S_ISREG(mode) > 0 then + table.insert(tbl, string.sub(path, len + 2)) + end + else + system:posixError(string.format("bad access to file %s", path), err, errno) + end + ::continue:: + end +end + +local tbl = {} +local dir = "../../beaver/guide" +walk(dir, dir, tbl) +print(system:dump(tbl)) diff --git a/source/tools/monitor/unity/test/bees/reload.sh b/source/tools/monitor/unity/test/bees/reload.sh new file mode 100755 index 0000000000000000000000000000000000000000..472c8c65a02e477eb13561120ccf0be59c509b5f --- /dev/null +++ b/source/tools/monitor/unity/test/bees/reload.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +pkill -1 unity-mon \ No newline at end of file diff --git a/source/tools/monitor/unity/test/bees/run.sh b/source/tools/monitor/unity/test/bees/run.sh index 32a1732ae2bcf874cb6f6c50e93fc0054c63639f..37055941e5db172dcf2f65bcca4b52416404d7e1 100755 --- a/source/tools/monitor/unity/test/bees/run.sh +++ b/source/tools/monitor/unity/test/bees/run.sh @@ -1,17 +1,26 @@ #!/bin/bash +pkill unity-mon + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/ -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib/ -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../tsdb/native/ -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../collector/native/ -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../beaver/ +#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib/ +#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../tsdb/native/ +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib/ +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../collector/lib/ source /etc/profile -cd ../../beeQ -make -if [ $? -ne 0 ];then - echo " make -- Failed : "$? - exit 0 -fi +cd ../../beeQ || exit 1 +[ ! -d ../lib ] && mkdir ../lib +cp ./lib/*.so ../lib +cp ../tsdb/native/*.so ../lib +rm -rf ../bin +[ ! -d ../bin ] && mkdir ../bin +cp unity-mon ../bin + +cd ../bin || exit 1 + +yaml_path=$1 +[ ! $yaml_path ] && yaml_path="/etc/sysak/plugin.yaml" -./unity-mon +echo $yaml_yaml_path +./unity-mon $yaml_path & diff --git a/source/tools/monitor/unity/test/bees/stop.sh b/source/tools/monitor/unity/test/bees/stop.sh new file mode 100755 index 0000000000000000000000000000000000000000..f16bc5e994f4f06ae7f28fa95cb2e5e69f8ca288 --- /dev/null +++ b/source/tools/monitor/unity/test/bees/stop.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +pkill unity-mon \ No newline at end of file diff --git a/source/tools/monitor/unity/test/bpf/dStack.py b/source/tools/monitor/unity/test/bpf/dStack.py new file mode 100644 index 0000000000000000000000000000000000000000..0a7a82d4c0404d9ed301468cad5207d75dbb2fe9 --- /dev/null +++ b/source/tools/monitor/unity/test/bpf/dStack.py @@ -0,0 +1,55 @@ +import os +import sys +import re + +rePid = re.compile(r"^\d+$") + + +def checkState(path, pid): + fStatus = os.path.join(path, pid, "status") + try: + with open(fStatus, "r") as f: + for _, line in enumerate(f): + if line.startswith("State:"): + _, stats = line.split(":", 1) + stats, _ = stats.split("(", 1) + return stats.strip() + except Exception: + return "N" + + +def getCmd(path, pid): + fCmd = os.path.join(path, pid, "cmdline") + try: + with open(fCmd, 'r') as f: + return f.read() + except Exception: + return "unknown" + + +def getKstack(path, pid): + fCmd = os.path.join(path, pid, "stack") + try: + with open(fCmd, 'r') as f: + return f.read() + except Exception: + return "unknown" + + +def walk_pids(path, fil): + for pid in os.listdir(path): + if rePid.match(pid): + tPath = "/proc/%s/task" % pid + for tid in os.listdir(tPath): + if checkState(tPath, tid) == fil: + print("task %s, comm: %s, status: %s" % (pid, getCmd(tPath, pid), fil)) + print(getKstack(tPath, pid)) + + +if __name__ == "__main__": + if len(sys.argv) == 1: + fil = "D" + else: + fil = sys.argv[1] + walk_pids("/proc", fil) + pass diff --git a/source/tools/monitor/unity/test/bpf/hello.py b/source/tools/monitor/unity/test/bpf/hello.py new file mode 100644 index 0000000000000000000000000000000000000000..3e4cead684a579d441aa7d640132eac821d203d0 --- /dev/null +++ b/source/tools/monitor/unity/test/bpf/hello.py @@ -0,0 +1,53 @@ +import ctypes as ct +from pylcc.lbcBase import ClbcBase + +bpfPog = r""" +#include "lbc.h" +#define TASK_COMM_LEN 16 +struct data_t { + u32 c_pid; + u32 p_pid; + char c_comm[TASK_COMM_LEN]; + char p_comm[TASK_COMM_LEN]; +}; + +LBC_PERF_OUTPUT(e_out, struct data_t, 128); +SEC("kprobe/_do_fork") +int j_wake_up_new_task(struct pt_regs *ctx) +{ + struct task_struct* parent = (struct task_struct *)PT_REGS_PARM1(ctx); + struct data_t data = {}; + + data.c_pid = bpf_get_current_pid_tgid() >> 32; + bpf_get_current_comm(&data.c_comm, TASK_COMM_LEN); + data.p_pid = BPF_CORE_READ(parent, pid); + bpf_core_read(&data.p_comm[0], TASK_COMM_LEN, &parent->comm[0]); + + bpf_perf_event_output(ctx, &e_out, BPF_F_CURRENT_CPU, &data, sizeof(data)); + return 0; +} + +char _license[] SEC("license") = "GPL"; +""" + +class CeventOut(ClbcBase): + def __init__(self): + super(CeventOut, self).__init__("eventOut", bpf_str=bpfPog) + + def _cb(self, cpu, data, size): + e = self.getMap('e_out', data, size) + print("current pid:%d, comm:%s. wake_up_new_task pid: %d, comm: %s" % ( + e.c_pid, e.c_comm, e.p_pid, e.p_comm + )) + + def loop(self): + self.maps['e_out'].open_perf_buffer(self._cb) + try: + self.maps['e_out'].perf_buffer_poll() + except KeyboardInterrupt: + print("key interrupt.") + exit() + +if __name__ == "__main__": + e = CeventOut() + e.loop() \ No newline at end of file diff --git a/source/tools/monitor/unity/test/bpf/net_health.py b/source/tools/monitor/unity/test/bpf/net_health.py new file mode 100644 index 0000000000000000000000000000000000000000..ad10f46f174070c1d5c4099afe5fa1a0b04e2223 --- /dev/null +++ b/source/tools/monitor/unity/test/bpf/net_health.py @@ -0,0 +1,47 @@ +from pylcc import ClbcBase +import time + +bpfPog = r""" +#include "lbc.h" + +LBC_ARRAY(outCnt, int, u64, 2); +LBC_HIST10(netHist); + +static inline void addCnt(int k, u64 val) { + u64 *pv = bpf_map_lookup_elem(&outCnt, &k); + if (pv) { + __sync_fetch_and_add(pv, val); + } +} + +SEC("kprobe/tcp_validate_incoming") +int j_tcp_validate_incoming(struct pt_regs *ctx) { + struct tcp_sock *tp = (struct tcp_sock *)PT_REGS_PARM1(ctx); + u64 ts = BPF_CORE_READ(tp, srtt_us) >> 3; + u64 ms = ts / 1000; + if (ms > 0) { + addCnt(0, ms); + addCnt(1, 1); + hist10_push(&netHist, ms); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; +""" + + +class CnetHealth(ClbcBase): + def __init__(self): + super(CnetHealth, self).__init__("net_health_bpf", bpf_str=bpfPog) + + def loop(self): + while True: + time.sleep(20) + print(self.maps['outCnt'].get()) + print(self.maps['netHist'].get()) + + +if __name__ == "__main__": + e = CnetHealth() + e.loop() diff --git a/source/tools/monitor/unity/test/curl/beaver/beavers.lua b/source/tools/monitor/unity/test/curl/beaver/beavers.lua index 619c4334c48adbd18edb9f9b05638474233f5a4e..af2966739ccb3880ce0d34acdcaaf2c006c2b74c 100644 --- a/source/tools/monitor/unity/test/curl/beaver/beavers.lua +++ b/source/tools/monitor/unity/test/curl/beaver/beavers.lua @@ -86,13 +86,15 @@ function Cbeavers:poll() if err then print("socket select return " .. err) end + local c = 0 for _, read in pairs(reads) do if type(read) == "number" then break elseif read == self._server then local s = read:accept() print("accept " .. s:getfd()) - table.insert(self._ss, s) + c = c + 1 + self._ss[c] = s local co = coroutine.create(function(o, s) self._proc(o, s) end) self._cos[s:getfd()] = co coroutine.resume(co, self, s) diff --git a/source/tools/monitor/unity/test/curl/docker.lua b/source/tools/monitor/unity/test/curl/docker.lua new file mode 100644 index 0000000000000000000000000000000000000000..dd349539116031d558723b1f39ae6102709dc41b --- /dev/null +++ b/source/tools/monitor/unity/test/curl/docker.lua @@ -0,0 +1,13 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/10 12:01 AM +--- + +package.path = package.path .. ";../../?.lua;" + +local api = require("httplib.dockerApi") +local system = require("common.system") + +local d = api.new('localhost', '/mnt/host/var/run/docker.sock') +print(system:dump(d:list_containers({ all = 'true' }))) diff --git a/source/tools/monitor/unity/test/curl/pods.lua b/source/tools/monitor/unity/test/curl/pods.lua new file mode 100644 index 0000000000000000000000000000000000000000..17ffb373f010b921ccf5a58891732d90d5441aac --- /dev/null +++ b/source/tools/monitor/unity/test/curl/pods.lua @@ -0,0 +1,15 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/10 11:50 PM +--- + +package.path = package.path .. ";../../?.lua;" + +local ChttpCli = require("httplib.httpCli") +local system = require("common.system") + +local cli = ChttpCli.new() +local res = cli:get("http://127.0.0.1:10255/pods") +local obj = cli:jdecode(res.body) +print(system:dump(obj.items)) diff --git a/source/tools/monitor/unity/test/fox/query.py b/source/tools/monitor/unity/test/fox/query.py index cddb328bce4edea0210199ffc038fd08c3a1efcc..41d6e5567de86922e71c451451523f87a1cecc22 100644 --- a/source/tools/monitor/unity/test/fox/query.py +++ b/source/tools/monitor/unity/test/fox/query.py @@ -17,7 +17,7 @@ def q_table(): def q_by_table(): - post_test({"mode": "last", "time": "100m", "table": ["cpu_total", "cpus"]}) + post_test({"mode": "last", "time": "5m", "table": ["per_sirqs"]}) def q_by_date(): @@ -34,7 +34,7 @@ def q_by_date(): if __name__ == "__main__": - post_test({"mode": "last", "time": "4m"}) + # post_test({"mode": "last", "time": "4m"}) # q_table() - # q_by_table() + q_by_table() # q_by_date() diff --git a/source/tools/monitor/unity/test/host/hostIp.lua b/source/tools/monitor/unity/test/host/hostIp.lua index 1b0266a630244aefccb2d605c291163465482e31..84b4d4a0dc9fac64f8bcf7edd770bca1afbf0232 100644 --- a/source/tools/monitor/unity/test/host/hostIp.lua +++ b/source/tools/monitor/unity/test/host/hostIp.lua @@ -9,8 +9,8 @@ local socket = require("socket") local function getAdd(hostName) local _, resolved = socket.dns.toip(hostName) local listTab = {} - for _, v in pairs(resolved.ip) do - table.insert(listTab, v) + for i, v in pairs(resolved.ip) do + listTab[i] = v end return listTab end diff --git a/source/tools/monitor/unity/test/posix/copySo.lua b/source/tools/monitor/unity/test/posix/copySo.lua new file mode 100644 index 0000000000000000000000000000000000000000..042fcfbab7fddae0853454f046d31c61a9913d3a --- /dev/null +++ b/source/tools/monitor/unity/test/posix/copySo.lua @@ -0,0 +1,91 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/5 12:58 PM +--- + +package.path = package.path .. ";../../?.lua;" +local system = require("common.system") +local dirent = require("posix.dirent") +local unistd = require("posix.unistd") +local stat = require("posix.sys.stat") +local pystring = require("common.pystring") + +local srcPath = "src/" +local dstPath = "dst/" + +local function listSrc(path) + local res = {} + local files = dirent.files(path) + for f in files do + if string.find(f, "%.so") then + table.insert(res, f) + end + end + return res +end + +local function checkDst() + if unistd.access(dstPath) then + local pstat = stat.stat(dstPath) + if stat.S_ISDIR(pstat.st_mode) == 0 then + error(string.format("dst %s is no a dictionary", dstPath)) + end + else + print("mkdir " .. dstPath) + local _, s, errno = stat.mkdir(dstPath) + if errno then + error(string.format("mkdir %s failed ,report %s. %d", dstPath), s, errno) + end + end +end + +local function checkSo(fPath) + local fSrc = srcPath .. fPath + local fDst = dstPath .. fPath + + if unistd.access(fDst) then + local sSrc = stat.stat(fSrc) + local sDst = stat.stat(fDst) + + if sSrc.st_mtime > sDst.st_mtime then -- modified + return true + else + return false + end + else -- exit + return true + end +end + +local function copySo(fPath) + local fSrc = srcPath .. fPath + local fDst = dstPath .. fPath + + local sFile, err = io.open(fSrc,"rb") + if err then + error(string.format("open file %s report %s."), fSrc, err) + end + local stream = sFile:read("*a") + sFile:close() + + local dFile, err = io.open(fDst,"wb") + if err then + error(string.format("open file %s report %s."), fDst, err) + end + dFile:write(stream) + dFile:close() +end + +local function checkSos() + checkDst() + local so_s = listSrc(srcPath) + for _, so in ipairs(so_s) do + if checkSo(so) then + print("need copy " .. so) + copySo(so) + end + end +end + +checkSos() diff --git a/source/tools/monitor/unity/test/posix/nr_cpu.lua b/source/tools/monitor/unity/test/posix/nr_cpu.lua new file mode 100644 index 0000000000000000000000000000000000000000..682a64154ad0dde9ca2381badad62cc04adbd191 --- /dev/null +++ b/source/tools/monitor/unity/test/posix/nr_cpu.lua @@ -0,0 +1,9 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/8 12:48 PM +--- + +local unistd = require("posix.unistd") + +print(unistd.sysconf(84)) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/posix/src/libtest.so b/source/tools/monitor/unity/test/posix/src/libtest.so new file mode 100644 index 0000000000000000000000000000000000000000..2af1bb7882b6ba602b3c41ffc445b9b03fe6c30a --- /dev/null +++ b/source/tools/monitor/unity/test/posix/src/libtest.so @@ -0,0 +1 @@ +just a test for so. \ No newline at end of file diff --git a/source/tools/monitor/unity/test/posix/src/libtest.so.2 b/source/tools/monitor/unity/test/posix/src/libtest.so.2 new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/source/tools/monitor/unity/test/posix/src/libtest.txt b/source/tools/monitor/unity/test/posix/src/libtest.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/source/tools/monitor/unity/test/posix/t_time.lua b/source/tools/monitor/unity/test/posix/t_time.lua new file mode 100644 index 0000000000000000000000000000000000000000..a5d9c85d147e413df666e6bf63708ee6dc74ae6b --- /dev/null +++ b/source/tools/monitor/unity/test/posix/t_time.lua @@ -0,0 +1,37 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/4 11:59 PM +--- + +package.path = package.path .. ";../../?.lua;" +local system = require("common.system") +local ptime = require("posix.time") + +local function calcSleep(hope, now) + if hope.tv_nsec >= now.tv_nsec then + return {tv_sec = hope.tv_sec - now.tv_sec, + tv_nsec = hope.tv_nsec - now.tv_nsec} + else + return {tv_sec = hope.tv_sec - now.tv_sec - 1, + tv_nsec = 1e9 + hope.tv_nsec - now.tv_nsec} + end +end + +local unit = 5 +local tStart = ptime.clock_gettime(ptime.CLOCK_MONOTONIC) +while true do + local now = ptime.clock_gettime(ptime.CLOCK_MONOTONIC) + local hope = {tv_sec = tStart.tv_sec + unit, tv_nsec = tStart.tv_sec} + local diff = calcSleep(hope, now) + assert(diff.tv_sec >= 0) + print(system:dump(diff)) + local _, s, errno, _ = ptime.nanosleep(diff) + if errno then + print(string.format("new sleep stop. %d, %s", errno, s)) + break + end + tStart = hope +end + + diff --git a/source/tools/monitor/unity/test/posix/t_utsname.lua b/source/tools/monitor/unity/test/posix/t_utsname.lua new file mode 100644 index 0000000000000000000000000000000000000000..b4b809dfc9de9c1133077158597fa32f457e53f9 --- /dev/null +++ b/source/tools/monitor/unity/test/posix/t_utsname.lua @@ -0,0 +1,12 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/4 11:29 PM +--- + +package.path = package.path .. ";../../?.lua;" +local utsname = require("posix.sys.utsname") +local system = require("common.system") + +local uts = utsname.uname() +print(system:dump(uts)) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/string/mdEsc.lua b/source/tools/monitor/unity/test/string/mdEsc.lua new file mode 100644 index 0000000000000000000000000000000000000000..50eef0955e62faf06acd5504940057566b4fabe4 --- /dev/null +++ b/source/tools/monitor/unity/test/string/mdEsc.lua @@ -0,0 +1,40 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/2/28 11:00 PM +--- + +local reFmt = "[\\`%*_%{%}%[%]%(%)#%+%-%.!|]" +local reHtml = '[<>&"\t]' + +local htmlRep = { + ["<"] = function() return "<" end, + [">"] = function() return ">" end, + ["&"] = function() return "&" end, + ['"'] = function() return """ end, + ["\t"] = function() return " " end, +} + +local function escape(s) + return htmlRep[s]() +end + +local function pEscape(s) + return string.gsub(s, reHtml, function(s) return escape(s) end) +end + +local function escMd(s) + return "\\" .. s +end + +local function pEscMd(s) + return string.gsub(s, reFmt, function(s) return escMd(s) end) +end + +local s = "" +print(pEscape(s)) + +s = "\\`.*_{}[]()#+-.|!" +print(#s) +print(pEscMd(s)) + diff --git a/source/tools/monitor/unity/test/string/py.lua b/source/tools/monitor/unity/test/string/py.lua index 04cb3142b49953f13a015a394ec5b79189a7b16e..2c6e5606bb62c2e0972cf6f07a6725f65f3ef1ad 100644 --- a/source/tools/monitor/unity/test/string/py.lua +++ b/source/tools/monitor/unity/test/string/py.lua @@ -14,6 +14,15 @@ assert(#ret == 3) assert(ret[1] == "hello") assert(ret[2] == "lua") assert(ret[3] == "language") +ret = pystring:split("hello lua language lua language") +assert(#ret == 5 ) +assert(ret[1] == "hello") +assert(ret[2] == "lua") +assert(ret[3] == "language") +assert(ret[4] == "lua") +assert(ret[5] == "language") +ret = pystring:split("Node 0, zone DMA 1 0 0 1 2 1 1 0 1 1 3") +assert(#ret == 15) -- 自定符号分割 ret = pystring:split("hello*lua *language", "*") @@ -26,6 +35,9 @@ assert(ret[3] == "language") ret = pystring:rsplit("hello*lua *language", "*", 1) assert(#ret == 2) assert(ret[1] == "hello*lua ") +ret = pystring:rsplit("hello*lua *lua language", "lua", 1) +assert(#ret == 2) +assert(ret[1] == "hello*lua *") -- 多字符串分割 ret = pystring:split("hello*lua *language", "*l") @@ -34,6 +46,46 @@ assert(ret[1] == "hello") assert(ret[2] == "ua ") assert(ret[3] == "anguage") +-- partition +ret = pystring:partition("hello lua") +assert(#ret == 3) +assert(ret[1] == "hello") +assert(ret[2] == " ") +assert(ret[3] == "lua") +ret = pystring:partition("hello*lua", "*") +assert(#ret == 3) +assert(ret[1] == "hello") +assert(ret[2] == "*") +assert(ret[3] == "lua") +ret = pystring:partition("hello lua language") +assert(#ret == 3) +assert(ret[1] == "hello") +assert(ret[2] == " ") +assert(ret[3] == "lua language") +ret = pystring:partition("hello lua language", "lua") +assert(#ret == 3) +assert(ret[1] == "hello ") +assert(ret[2] == "lua") +assert(ret[3] == " language") +ret = pystring:partition("hello*lua") +assert(ret == nil) + +-- rpartition +ret = pystring:rpartition("hello lua language") +assert(ret[1] == "hello lua") +assert(ret[2] == " ") +assert(ret[3] == "language") +ret = pystring:rpartition("hello lua lua language", "lua") +assert(ret[1] == "hello lua ") +assert(ret[2] == "lua") +assert(ret[3] == " language") + +-- splitlines +ret = pystring:splitlines("hello\nlua\nlanguage") +assert(ret[1] == "hello") +assert(ret[2] == "lua") +assert(ret[3] == "language") + -- strip掉左右空格 assert(pystring:strip("\t hello world. \t\n") == "hello world.") @@ -63,3 +115,109 @@ assert(pystring:endswith("hello world", "world")) -- find assert(pystring:find("hello world.", "hello") == 1) +assert(pystring:find("hello world.", "hEllo") == -1) + +-- rfind +assert(pystring:rfind("hello world hello.", "hello") == 12) +assert(pystring:rfind("hello world hello.", "hEllo") == -1) + +-- count +assert(pystring:count("hello world hello.", "hello") == 2) +assert(pystring:count("hello world hello.", "hEllo") == 0) +assert(pystring:count("hello world hello.", " ") == 2) + +-- shift +assert(pystring:shift("abcd", 1) == "dabc") +assert(pystring:shift("abcd", -1) == "bcda") +assert(pystring:shift("abcd", -2) == "cdab") + +-- swapcase +assert(pystring:swapcase("Hello, World!") == "hELLO, wORLD!") + +-- capitalize +assert(pystring:capitalize("hello") == "Hello") +assert(pystring:capitalize("") == "") +assert(pystring:capitalize("H") == "H") + +-- title +assert(pystring:title("hello") == "Hello") +assert(pystring:title("") == "") +assert(pystring:title("hello world.") == "Hello World.") +assert(pystring:title("hello world.") == "Hello World.") + +-- capwords +assert(pystring:capwords("hello world.") == "Hello World.") +assert(pystring:capwords("hello world.\nhere you are.") == "Hello World.\nHere You Are.") + +-- islower +assert(pystring:islower("hello") == true) +assert(pystring:islower("Hello") == false) +assert(pystring:islower("hello world!") == true) + +-- isupper +assert(pystring:isupper("HELLO") == true) +assert(pystring:isupper("Hello") == false) +assert(pystring:isupper("Hello World") == false) +assert(pystring:isupper("HELLO WORLD!") == true) + +-- isdigit +assert(pystring:isdigit("1234") == true) +assert(pystring:isdigit("123a") == false) +assert(pystring:isdigit("123.45") == false) + +-- ishex +assert(pystring:ishex("1234") == true) +assert(pystring:ishex("123a") == true) +assert(pystring:ishex("abcdef") == true) +assert(pystring:ishex("00ABCDEF") == true) +assert(pystring:ishex("123FG") == false) +assert(pystring:ishex("123.45") == false) + +-- isalnum +assert(pystring:isalnum("1234") == true) +assert(pystring:isalnum("00ABCDEF") == true) +assert(pystring:isalnum("123FG") == true) +assert(pystring:isalnum("123.45") == false) +assert(pystring:isalnum("123 45") == false) + +-- istilte +assert(pystring:istilte("Aaa") == true) +assert(pystring:istilte("aaa") == false) +assert(pystring:istilte("Aaa0") == false) +assert(pystring:istilte("A") == true) + +-- isfloat +assert(pystring:isfloat("1234") == true) +assert(pystring:isfloat("00ABCDEF") == false) +assert(pystring:isfloat("123FG") == false) +assert(pystring:isfloat("123.45") == true) +assert(pystring:isfloat("123 45") == false) + +-- ljust +assert(pystring:ljust("1234", 5) == " 1234") +assert(pystring:ljust("1234", 3) == "1234") +assert(pystring:ljust("1234", 6, "*") == "**1234") + +-- rjust +assert(pystring:rjust("1234", 5) == "1234 ") +assert(pystring:rjust("1234", 3) == "1234") +assert(pystring:rjust("1234", 6, "*") == "1234**") + +-- center +assert(pystring:center("1234", 5) == "1234 ") +assert(pystring:center("1234", 7) == " 1234 ") +assert(pystring:center("1234", 8) == " 1234 ") +assert(pystring:center("1234", 8, "*") == "**1234**") + +-- zfill +assert(pystring:zfill("3.14", 6) == "003.14") + +-- replace +assert(pystring:replace("hello world.", "world", "lua") == "hello lua.") +assert(pystring:replace("hello world world.", "world", "lua") == "hello lua lua.") +assert(pystring:replace("hello world world.", "world", "lua", 1) == "hello lua world.") +assert(pystring:replace("hello %. %*.", "%.", "%*") == "hello %* %*.") +assert(pystring:replace("hello %. %*.", "%.", " ") == "hello %*.") + +-- expandtabs +assert(pystring:expandtabs("hello\tworld.") == "hello world.") diff --git a/source/tools/monitor/unity/test/unix/pyunix.py b/source/tools/monitor/unity/test/unix/pyunix.py new file mode 100644 index 0000000000000000000000000000000000000000..c215808a01506738909909d95a1c808eb63b8bbf --- /dev/null +++ b/source/tools/monitor/unity/test/unix/pyunix.py @@ -0,0 +1,28 @@ +import os +import time +import socket + +PIPE_PATH = "/tmp/sysom" +MAX_BUFF = 128 * 1024 + + +class CnfPut(object): + def __init__(self): + self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) + self._path = PIPE_PATH + if not os.path.exists(self._path): + raise ValueError("pipe path is not exist. please check Netinfo is running.") + + def puts(self, s): + if len(s) > MAX_BUFF: + raise ValueError("message len %d, is too long ,should less than%d" % (len(s), MAX_BUFF)) + return self._sock.sendto(s, self._path) + + +if __name__ == "__main__": + nf = CnfPut() + i = 10 + while True: + nf.puts('io_burst,disk=/dev/vda1 limit=10.0,max=%d,log="io log burst"' % i) + i += 1 + time.sleep(5) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/yaml/descr.yaml b/source/tools/monitor/unity/test/yaml/descr.yaml index ec4e3ec502bc2e81961cf1827256a08da35e55e0..619a4a086bd876197620fa495be377e9de38b634 100644 --- a/source/tools/monitor/unity/test/yaml/descr.yaml +++ b/source/tools/monitor/unity/test/yaml/descr.yaml @@ -11,4 +11,6 @@ metrics: index: [] field: [user, nice, sys, idle, iowait, hardirq, softirq, steal, guest, guestnice] help: "cpu usage info for per-cpu." - type: "gauge" \ No newline at end of file + type: "gauge" +config: + path: pods.yaml \ No newline at end of file diff --git a/source/tools/monitor/unity/test/yaml/pods.yaml b/source/tools/monitor/unity/test/yaml/pods.yaml new file mode 100644 index 0000000000000000000000000000000000000000..795e330b3cef517c82214f557debff72d66c0ccf --- /dev/null +++ b/source/tools/monitor/unity/test/yaml/pods.yaml @@ -0,0 +1,10 @@ +pods: + - hello + - wolrd + - pod2 + - pod1 + +confs: + - path: /sys/cgroup + depth: 3 + - path: \ No newline at end of file diff --git a/source/tools/monitor/unity/test/yaml/systemYaml.lua b/source/tools/monitor/unity/test/yaml/systemYaml.lua new file mode 100644 index 0000000000000000000000000000000000000000..3c7410b2ca9cbf58774cf80f74861c4f1bab8c24 --- /dev/null +++ b/source/tools/monitor/unity/test/yaml/systemYaml.lua @@ -0,0 +1,14 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/3/8 11:19 AM +--- + +package.path = package.path .. ";../../?.lua;" + +local system = require("common.system") + +local res = system:parseYaml("descr.yaml") +local path = res.config.path +local pods = system:parseYaml(path) +print(system:dump(pods.pods)) diff --git a/source/tools/monitor/unity/tsdb/foxTSDB.lua b/source/tools/monitor/unity/tsdb/foxTSDB.lua index b808489bce6fa04e39e34a25f76575037489dd5a..536d13841705f3f6bcb4c51781f1c45622867f4d 100644 --- a/source/tools/monitor/unity/tsdb/foxTSDB.lua +++ b/source/tools/monitor/unity/tsdb/foxTSDB.lua @@ -14,11 +14,11 @@ local foxFFI = require("tsdb.native.foxffi") local CfoxTSDB = class("CfoxTSDB") -function CfoxTSDB:_init_() +function CfoxTSDB:_init_(fYaml) self.ffi = foxFFI.ffi self.cffi = foxFFI.cffi self._proto = CprotoData.new(nil) - self._qBudget = 200 + self:setupConf(fYaml) end function CfoxTSDB:_del_() @@ -28,6 +28,13 @@ function CfoxTSDB:_del_() self._man = nil end +function CfoxTSDB:setupConf(fYaml) + local conf = system:parseYaml(fYaml) + local dbConf = conf.db or {budget = 200, rotate=7} + self._qBudget = dbConf.budget or 200 + self._rotate = dbConf.rotate or 7 +end + function CfoxTSDB:get_us() return self.cffi.get_us() end @@ -129,7 +136,7 @@ function CfoxTSDB:rotateDb() local unistd = require("posix.unistd") local usec = self._man.now - local sec = 7 * 24 * 60 * 60 + local sec = self._rotate * 24 * 60 * 60 local foxTime = self:getDateFrom_us(usec - sec * 1e6) local level = foxTime.year * 10000 + foxTime.mon * 100 + foxTime.mday @@ -147,7 +154,6 @@ function CfoxTSDB:rotateDb() print("delete " .. "./" .. f) pcall(unistd.unlink, "./" .. f) end - --pcall(unistd.unlink, "../" .. f) end end end @@ -240,6 +246,7 @@ function CfoxTSDB:query(start, stop, ms) -- start stop should at the same mday self:curMove(start) -- moveto position + local lenMs = #ms for line in self:loadData(stop) do local time = line.time for _, v in ipairs(line.lines) do @@ -261,34 +268,51 @@ function CfoxTSDB:query(start, stop, ms) -- start stop should at the same mday end tCell.values = values - table.insert(ms, tCell) + lenMs = lenMs + 1 + ms[lenMs] = tCell end end return ms end -function CfoxTSDB:qlast(last, ms) - local now = self:get_us() - local date = self:getDateFrom_us(now) - local beg = now - last * 1e6; - - if self._man then -- has setup - if self.cffi.check_pman_date(self._man, date) == 1 then -- at the same day - return self:query(beg, now, ms) - else - self:_del_() -- destroy old manager - if self:_setupRead(now) ~= 0 then -- try to create new - return ms - else - return self:query(beg, now, ms) - end +function CfoxTSDB:_qlast(date, beg, stop, ms) + if not self._man then -- check _man is already installed. + if self:_setupRead(beg) ~= 0 then -- try to create new + return ms end + end + + if self.cffi.check_pman_date(self._man, date) == 1 then + return self:query(beg, stop, ms) else - if self:_setupRead(now) ~= 0 then -- try to create new + self:_del_() + if self:_setupRead(beg) ~= 0 then -- try to create new return ms - else - return self:query(beg, now, ms) end + return self:query(beg, stop, ms) + end +end + +function CfoxTSDB:qlast(last, ms) + assert(last < 24 * 60 * 60) + + local now = self:get_us() + local beg = now - last * 1e6 + + local dStart = self:getDateFrom_us(beg) + local dStop = self:getDateFrom_us(now) + + if self.cffi.check_foxdate(dStart, dStop) ~= 0 then + self:_qlast(dStart, beg, now, ms) + else + dStop.hour, dStop.min, dStop.sec = 0, 0, 0 + local beg1 = beg + local beg2 = self.cffi.make_stamp(dStop) + local now1 = beg2 - 1 + local now2 = now + + self:_qlast(dStart, beg1, now1, ms) + self:_qlast(dStop, beg2, now2, ms) end end @@ -304,6 +328,7 @@ function CfoxTSDB:qDay(start, stop, ms, tbls, budget) budget = budget or self._qBudget self:curMove(start) local inc = false + local lenMs = #ms for line in self:loadData(stop) do inc = false local time = line.time @@ -336,7 +361,8 @@ function CfoxTSDB:qDay(start, stop, ms, tbls, budget) end tCell.logs = logs - table.insert(ms, tCell) + lenMs = lenMs + 1 + ms[lenMs] = tCell inc = true end end @@ -361,11 +387,13 @@ function CfoxTSDB:qDayTables(start, stop, tbls) end self:curMove(start) + local lenTbls = #tbls for line in self:loadData(stop) do for _, v in ipairs(line.lines) do local title = v.line if not system:valueIsIn(tbls, title) then - table.insert(tbls, title) + lenTbls = lenTbls + 1 + tbls[lenTbls] = title end end end @@ -384,6 +412,7 @@ function CfoxTSDB:qDate(dStart, dStop, tbls) if self.cffi.check_foxdate(dStart, dStop) ~= 0 then self:qDay(beg, now, ms, tbls) else + dStop.hour, dStop.min, dStop.sec = 0, 0, 0 local beg1 = beg local beg2 = self.cffi.make_stamp(dStop) local now1 = beg2 - 1 @@ -412,6 +441,7 @@ function CfoxTSDB:qNow(sec, tbls) if self.cffi.check_foxdate(dStart, dStop) ~= 0 then self:qDay(beg, now, ms, tbls) else + dStop.hour, dStop.min, dStop.sec = 0, 0, 0 local beg1 = beg local beg2 = self.cffi.make_stamp(dStop) local now1 = beg2 - 1 @@ -440,6 +470,7 @@ function CfoxTSDB:qTabelNow(sec) if self.cffi.check_foxdate(dStart, dStop) ~= 0 then self:qDayTables(beg, now, tbls) else + dStop.hour, dStop.min, dStop.sec = 0, 0, 0 local beg1 = beg local beg2 = self.cffi.make_stamp(dStop) local now1 = beg2 - 1