diff --git a/package/go/1.18/install.sh b/package/go/1.18/install.sh index c06227794b59c6d5f2fbb186b561efbd7765b834..f88e916e7c152daba97d1391a2f79a6ff496294a 100755 --- a/package/go/1.18/install.sh +++ b/package/go/1.18/install.sh @@ -5,9 +5,9 @@ set -e cd ${JARVIS_TMP} # check Arch if [ x$(arch) = xaarch64 ];then - file_name='go1.18.linux-arm64.tar.gz' + file_name='go1.18.linux-arm64' else - file_name='go1.18.linux-amd64.tar.gz' + file_name='go1.18.linux-amd64' fi -. ${DOWNLOAD_TOOL} -u https://go.dev/dl/${file_name} +. ${DOWNLOAD_TOOL} -u https://go.dev/dl/${file_name}.tar.gz tar -xzvf ${JARVIS_DOWNLOAD}/${file_name} -C $1 \ No newline at end of file diff --git a/package/lmbench/3.0/install.sh b/package/lmbench/3.0/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..25dee88f59e32408f06d57936974a574f718e5ec --- /dev/null +++ b/package/lmbench/3.0/install.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -x +set -e +. $CHECK_ROOT && yum install libtirpc-devel +cd ${JARVIS_TMP} +if [ ! -d lmbench ]; then + git clone --depth=1 https://github.com/intel/lmbench.git +fi +cd lmbench +cd src +sed -i '61c COMPILE=$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -ltirpc -I/usr/include/tirpc' Makefile +sed -i -e '114,115s/-O /-O2 /' Makefile +sed -i -e '139s/-O /-O2 /' Makefile +sed -i '231,234s/^/#/' Makefile +make OS=openEuler +#注意虽然安装会报错,但是程序是可以正常运行的 +#本地时延测试(112ns) +#numactl -C 3 -m 0 ./lat_mem_rd -P 1 -W 5 -N 5 -t 1024M 1024 +#-C:指定CPU核心,-m:指定numa节点 -P:并发进程数 -W:预热时间 -N:循环次数 -t:是否对被测区域采用垃圾数据初始化 +# 1024M:测试内存的大小 1024:测试步幅 +# 跨die时延测试(127ns) +#numactl -C 3 -m 1 ./lat_mem_rd -P 1 -W 5 -N 5 -t 1024M 1024 +# 并行测试CPU时延() +#numactl -C 0-64 -m 0-1 ./lat_mem_rd -P 64 -W 5 -N 5 -t 1024M 1024 +#本地带宽测试 +#1core : taskset -c 0 ./bw_mem -P 1 -W 5 -N 5 48M rd +#1cluster:taskset -c 0-3 ./bw_mem -P 4 -W 5 -N 5 48M rd +#1 die:taskset -c 0-31 ./bw_mem -P 32 -W 5 -N 5 48M rd +#1 CPU: taskset -c 0-64 ./bw_mem -P 64 -W 5 -N 5 48M rd \ No newline at end of file diff --git a/src/analysisService.py b/src/analysisService.py index 52ddf727c81fc12dab52c388be2d07a3f7561482..38cf0cdff18653c70a406ff2409bacc0a89f5b29 100644 --- a/src/analysisService.py +++ b/src/analysisService.py @@ -66,7 +66,10 @@ class AnalysisService: self.jrun.batch_run() def job_run(self): - self.jrun.job_run() + self.jrun.job_run(1) + + def job2_run(self): + self.jrun.job_run(2) def clean(self): self.jbuild.clean() @@ -95,8 +98,12 @@ class AnalysisService: def update(self): self.jinstall.update() + def check_download_url(self): + self.jinstall.check_download_url() + def gen_def(self, image): self.jcontainer.gen_def(image) def gen_simucode(self): self.jloop.get_simulate_code() + \ No newline at end of file diff --git a/src/dataService.py b/src/dataService.py index ce7e90dbd7590db21467e50c34cc878d940d9bed..09e888db1e7c5b354af37fc49def68d07040f514 100644 --- a/src/dataService.py +++ b/src/dataService.py @@ -38,6 +38,7 @@ class DataService(object,metaclass=Singleton): batch_cmd = '' loop_cmd = '' job_cmd = '' + job2_cmd = '' #Other Info env_config_file = 'JARVIS_CONFIG' config_file = 'data.config' @@ -134,43 +135,51 @@ class DataService(object,metaclass=Singleton): second = split_list[1] return (first, second) + def data_integration(self, config_data): + DataService.avail_ips = config_data.get('[SERVER]','') + DataService.download_info = config_data.get('[DOWNLOAD]','') + DataService.dependency = config_data.get('[DEPENDENCY]','') + DataService.module_content = config_data.get('[ENV]','') + DataService.build_cmd = config_data.get('[BUILD]','') + DataService.clean_cmd = config_data.get('[CLEAN]','') + DataService.run_cmd = config_data.get('[RUN]','') + DataService.batch_cmd = config_data.get('[BATCH]','') + DataService.loop_cmd = config_data.get('[LOOP]','') + DataService.job_cmd = config_data.get('[JOB]','') + DataService.job2_cmd = config_data.get('[JOB2]','') + data = config_data.get('[APP]','') + perf_data = config_data.get('[PERF]','') + self.set_app_info(data) + self.set_perf_info(perf_data) + DataService.binary_file, DataService.binary_para = self.split_two_part(DataService.run_cmd['binary']) + def data_process(self): contents = self.get_data_config() rows = contents.split('\n') rowIndex = 0 - data = {} - perf_data = {} + handlers = { + '[SERVER]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), + '[DOWNLOAD]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), + '[DEPENDENCY]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), + '[ENV]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), + '[APP]': lambda rows, rowIndex: self.read_rows_kv(rows, rowIndex+1), + '[BUILD]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), + '[CLEAN]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), + '[RUN]': lambda rows, rowIndex: self.read_rows_kv(rows, rowIndex+1), + '[BATCH]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), + '[LOOP]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), + '[JOB]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), + '[JOB2]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), + '[PERF]': lambda rows, rowIndex: self.read_rows_kv(rows, rowIndex+1) + } + config_data = {} while rowIndex < len(rows): row = rows[rowIndex].strip() - if row == '[SERVER]': - rowIndex, DataService.avail_ips = self.read_rows(rows, rowIndex+1) - elif row == '[DOWNLOAD]': - rowIndex, DataService.download_info = self.read_rows(rows, rowIndex+1) - elif row == '[DEPENDENCY]': - rowIndex, DataService.dependency = self.read_rows(rows, rowIndex+1) - elif row == '[ENV]': - rowIndex, DataService.module_content = self.read_rows(rows, rowIndex+1) - elif row == '[APP]': - rowIndex, data = self.read_rows_kv(rows, rowIndex+1) - self.set_app_info(data) - elif row == '[BUILD]': - rowIndex, DataService.build_cmd = self.read_rows(rows, rowIndex+1, False) - elif row == '[CLEAN]': - rowIndex, DataService.clean_cmd = self.read_rows(rows, rowIndex+1) - elif row == '[RUN]': - rowIndex, DataService.run_cmd = self.read_rows_kv(rows, rowIndex+1) - elif row == '[BATCH]': - rowIndex, DataService.batch_cmd = self.read_rows(rows, rowIndex+1) - elif row == '[LOOP]': - rowIndex, DataService.loop_cmd = self.read_rows(rows, rowIndex+1, False) - elif row == '[JOB]': - rowIndex, DataService.job_cmd = self.read_rows(rows, rowIndex+1, False) - elif row == '[PERF]': - rowIndex, perf_data = self.read_rows_kv(rows, rowIndex+1) - self.set_perf_info(perf_data) + if row in handlers.keys(): + rowIndex, config_data[row] = handlers[row](rows, rowIndex) else: rowIndex += 1 - DataService.binary_file, DataService.binary_para = self.split_two_part(DataService.run_cmd['binary']) + self.data_integration(config_data) def get_clean_cmd(self): return f''' diff --git a/src/installService.py b/src/installService.py index ce2155649c13b4ad57023e68342e1c94242dc220..eb535bee68bcfdad8d24c42fbbc6da97738ca660 100644 --- a/src/installService.py +++ b/src/installService.py @@ -3,12 +3,14 @@ import os import sys import re +import fnmatch from enum import Enum from glob import glob from dataService import DataService from toolService import ToolService from executeService import ExecuteService +from jsonService import JSONService class SType(Enum): COMPILER = 1 @@ -26,12 +28,14 @@ class InstallService: self.FULL_VERSION='fullver' self.PACKAGE_PATH = os.path.join(self.ROOT, self.PACKAGE) self.SOFTWARE_PATH = os.path.join(self.ROOT, 'software') + self.INSTALL_INFO_PATH = os.path.join(self.SOFTWARE_PATH, "install.json") self.COMPILER_PATH = os.path.join(self.SOFTWARE_PATH, 'compiler') self.LIBS_PATH = os.path.join(self.SOFTWARE_PATH, 'libs') self.MODULE_DEPS_PATH = os.path.join(self.SOFTWARE_PATH, 'moduledeps') self.MODULE_FILES = os.path.join(self.SOFTWARE_PATH, 'modulefiles') self.MPI_PATH = os.path.join(self.SOFTWARE_PATH, 'mpi') self.UTILS_PATH = os.path.join(self.SOFTWARE_PATH, 'utils') + self.json = JSONService(self.INSTALL_INFO_PATH) def get_version_info(self, info, reg = r'(\d+)\.(\d+)\.(\d+)'): matched_group = re.search(reg ,info) @@ -306,20 +310,12 @@ setenv {sname.upper().replace('-','_')}_PATH {install_path} ''' return module_file_content - def get_installed_file_path(self, install_path): - return os.path.join(install_path, "installed") - def is_installed(self, install_path): - installed_file_path = self.get_installed_file_path(install_path) - if not os.path.exists(installed_file_path): - return False - if not self.tool.read_file(installed_file_path) == "1": - return False - return True - - def set_installed_status(self, install_path, flag="1"): - installed_file_path = self.get_installed_file_path(install_path) - self.tool.write_file(installed_file_path, flag) + #为了兼容老版本,只要安装路径下存在installed也算做已安装 + installed_file_path = os.path.join(install_path, "installed") + if self.tool.read_file(installed_file_path) == "1": + return True + return self.json.query_data(install_path) def gen_module_file(self, install_path, software_info, env_info): sname = software_info['sname'] @@ -329,11 +325,11 @@ setenv {sname.upper().replace('-','_')}_PATH {install_path} cfullversion = env_info[self.FULL_VERSION] module_file_content = self.get_module_file_content(install_path, sname, sversion) if not self.is_installed(install_path): - return + return '' # if install_path is empty, The module file should not generated. if len(os.listdir(install_path)) == 1: print('module file did not generated because no file generated under install path') - return + return '' if stype == SType.MPI: compiler_str = cname + cfullversion software_str = sname + sversion @@ -364,6 +360,10 @@ setenv {sname.upper().replace('-','_')}_PATH {install_path} module_file = os.path.join(module_path, sversion) self.tool.write_file(module_file, module_file_content) print(f"module file {module_file} successfully generated") + row = self.json.query_data(install_path) + row["module_path"] = module_file + self.json.update_data(install_path, row) + self.json.write_file() def install_package(self, abs_software_path, install_path, other_args): install_script = 'install.sh' @@ -389,11 +389,18 @@ chmod +x {install_script} result = self.exe.exec_raw(install_cmd) if result: print(f"install to {install_path} successful") - self.set_installed_status(install_path, "1") else: print("install failed") sys.exit() + def add_install_info(self, software_info, install_path): + software_dict = {} + software_dict['name'] = software_info['sname'] + software_dict['version'] = software_info['sversion'] + software_dict['module_path'] = '' + self.json.add_data(install_path, software_dict) + self.json.write_file() + def install(self, install_args): software_path = install_args[0] compiler_mpi_info = install_args[1] @@ -426,6 +433,8 @@ chmod +x {install_script} if not install_path: return # get install script self.install_package(abs_software_path, install_path, other_args) + # add install info + self.add_install_info(software_info, install_path) # gen module file self.gen_module_file( install_path, software_info, env_info) @@ -445,11 +454,11 @@ chmod +x {depend_file} def remove(self, software_info): self.tool.prt_content("UNINSTALL " + software_info) - file_list = [d for d in glob(self.SOFTWARE_PATH+'/**', recursive=True)] remove_list = [] - for file in file_list: - if software_info in file and os.path.isdir(file) and self.is_installed(file): - remove_list.append(file) + installed_dict = self.json.read_file() + for path, software_row in installed_dict.items(): + if software_info in software_row['name']: + remove_list.append((path, software_row)) lens = len(remove_list) if lens == 0: print("software not installed") @@ -457,7 +466,7 @@ chmod +x {depend_file} choice = 1 if lens > 1: for i in range(lens): - print(f"{i+1}: {remove_list[i]}") + print(f"{i+1}: {remove_list[i][1]}") self.tool.prt_content("") choice = input(f"find {lens} software, which one do you want to remove?\n") try: @@ -467,31 +476,40 @@ chmod +x {depend_file} return except: sys.exit("please enter a valid number!") - self.set_installed_status(remove_list[choice-1], "0") + self.json.delete_data(remove_list[choice-1][0]) + self.json.write_file() print("Successfully remove "+software_info) def list(self): self.tool.prt_content("Installed list".upper()) - file_list = [d for d in glob(self.SOFTWARE_PATH+'/**', recursive=True)] - installed_list = [] - for file in file_list: - if os.path.isdir(file) and self.is_installed(file): - installed_list.append(file) - for file in installed_list: - file = file.replace(self.SOFTWARE_PATH, 'software') - print(file) - + installed_list = self.json.read_file() + if len(installed_list) == 0: + print("no software installed.") + return + # 获取所有列名,除了module_path + headers = list(installed_list.values())[0].keys() + print_headers = list(headers)[:-1] + # 打印表头 + table_str = "" + for header in print_headers: + table_str += f"{header:<10}" + # 添加path打印 + table_str += " path" + # 分割线 + table_str += "\n" + "-" * (10 * (len(print_headers)+1)) + "\n" + # 打印每行数据 + for path, row in installed_list.items(): + for key in print_headers: + table_str += f"{row[key]:<10} " + table_str += f"{path:<10} \n" + print(table_str) + def find(self, content): self.tool.prt_content(f"Looking for package {content}") - file_list = [d for d in glob(self.SOFTWARE_PATH+'/**', recursive=True)] - flag = False - for file in file_list: - if os.path.isdir(file) and self.is_installed(file): - if content in file: - flag = True - print(f"FOUND: {file}") - if not flag: - print("NOT FOUND") + installed_list = list(self.json.read_file().values()) + for row in installed_list: + if content in row['name']: + print(row) # update path when hpcrunner is translocation def update(self): @@ -508,5 +526,29 @@ chmod +x {depend_file} if search_old_path: content = content.replace(search_old_path.group(1), self.ROOT) self.tool.write_file(file, content) + #还要更新install list + install_info = self.tool.read_file(self.INSTALL_INFO_PATH) + search_old_path = re.search(r'(\/.*hpcrunner(-master)?)', install_info) + if search_old_path: + content = install_info.replace(search_old_path.group(1), self.ROOT) + self.tool.write_file(self.INSTALL_INFO_PATH, content) print("update successfully.") + def check_download_url(self): + # 查找指定目录下所有名字叫做install.sh的文件,将文件路径保存到列表中 + matches = [] + for root, dirnames, filenames in os.walk(self.PACKAGE_PATH): + for filename in fnmatch.filter(filenames, 'install.sh'): + matches.append(os.path.join(root, filename)) + # 定义匹配下载链接的正则表达式 + url_regex = r'(https?://\S+\.[zip|rar|tar|gz|bz|git]{2,3})' + for script in matches: + script_content = self.tool.read_file(script) + urls = re.findall(url_regex, script_content) + print(f"checking script {script}") + for url in urls: + if self.tool.check_url_isvalid(url): + print(f"url {url} successfully checked") + else: + print(f"url {url} check failed,please update") + print("all of the urls has been checked.") diff --git a/src/jarvis.py b/src/jarvis.py index ed2b7fd35ffb67530546da44b8038b93745d4007..24404709e0dd16eddaf883b4e7b632a6f13de24e 100644 --- a/src/jarvis.py +++ b/src/jarvis.py @@ -29,6 +29,7 @@ class Jarvis: parser.add_argument("-cls","--clean", help=f"clean {DataService.app_name}", action="store_true") parser.add_argument("-r","--run", help=f"run {DataService.app_name}", action="store_true") parser.add_argument("-j","--job", help=f"run job {DataService.app_name}", action="store_true") + parser.add_argument("-j2","--job2", help=f"run job 2 {DataService.app_name}", action="store_true") parser.add_argument("-p","--perf", help=f"auto perf {DataService.app_name}", action="store_true") parser.add_argument("-kp","--kperf", help=f"auto kperf {DataService.app_name}", action="store_true") # GPU perf @@ -56,6 +57,8 @@ class Jarvis: parser.add_argument("-R","--roce", help="start roce run...", nargs=2) # update modulefile path when hpcrunner is moved parser.add_argument("-u","--update", help="start update jarvis...", action="store_true") + # check download url is good or not + parser.add_argument("-check","--check", help="start check jarvis download url...", action="store_true") self.args = parser.parse_args() def main(self): @@ -95,6 +98,9 @@ class Jarvis: if self.args.job: self.analysis.job_run() + if self.args.job2: + self.analysis.job2_run() + if self.args.run: self.analysis.run() @@ -143,5 +149,8 @@ class Jarvis: if self.args.loop: self.analysis.gen_simucode() + if self.args.check: + self.analysis.check_download_url() + if __name__ == '__main__': Jarvis().main() diff --git a/src/jsonService.py b/src/jsonService.py new file mode 100644 index 0000000000000000000000000000000000000000..24835c0a83882be57fd7d12f4e7a3968179b9e7e --- /dev/null +++ b/src/jsonService.py @@ -0,0 +1,49 @@ +import json +import os + +class JSONService: + def __init__(self, filename): + self.filename = filename + self.data = self.read_file() + + # 读取 JSON 文件 + def read_file(self): + if not os.path.exists(self.filename): + with open(self.filename, 'w') as f: + f.write('{}') + with open(self.filename, "r") as file: + data = json.load(file) + return data + + # 写入 JSON 文件 + def write_file(self): + with open(self.filename, "w") as file: + json.dump(self.data, file, indent=4) + + # 查询数据 + def query_data(self, key): + if key in self.data: + return self.data[key] + else: + return None + + # 添加数据 + def add_data(self, key, value): + self.data[key] = value + + # 删除数据 + def delete_data(self, key): + if key in self.data: + del self.data[key] + else: + print("Key not found") + + # 修改数据 + def update_data(self, key, value): + if key in self.data: + self.data[key] = value + else: + print("Key not found") + + def json_transform(self, dict): + return json.dumps(dict) \ No newline at end of file diff --git a/src/runService.py b/src/runService.py index 9e730f6dc94c7bd881ad24c05bd6a33dd1030289..e58209e8697f4fa1c17a03e233e61f864416b00d 100644 --- a/src/runService.py +++ b/src/runService.py @@ -49,20 +49,23 @@ chmod +x {batch_file} ''' self.exe.exec_raw(run_cmd) - def job_run(self): + def job_run(self,num): job_file = 'job_run.sh' job_file_path = os.path.join(self.ROOT, job_file) print(f"start job run {DataService.app_name}") + job_cmd = DataService.job_cmd if num == 1 else DataService.job2_cmd job_content = f''' {self.hpc_data.get_env()} cd {DataService.case_dir} cat > run.sh << \EOF -{DataService.job_cmd} +{job_cmd} EOF chmod +x run.sh if type djob >/dev/null 2>&1;then dsub -s run.sh +elif type sbatch >/dev/null 2>&1;then + sbatch run.sh else echo "dsub not exists." fi diff --git a/src/toolService.py b/src/toolService.py index 1745b965bf1b745498f185d288b2d2173e9be276..5d5db5f7297c70440c12f86e4c706a24c467fb83 100644 --- a/src/toolService.py +++ b/src/toolService.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import time import os +import requests class ToolService: def __init__(self): @@ -19,8 +20,11 @@ class ToolService: def read_file(self, filename): content = '' - with open(filename, encoding='utf-8') as f: - content = f.read().strip() + try: + with open(filename, encoding='utf-8') as f: + content = f.read().strip() + except IOError: + return content return content def write_file(self, filename, content=""): @@ -34,3 +38,13 @@ class ToolService: def mkfile(self, path, content=''): if not os.path.exists(path): self.write_file(path, content) + + def check_url_isvalid(self,url): + try: + response = requests.get(url, stream=True) + if response.status_code == 200: + return True + else: + return False + except requests.exceptions.RequestException as e: + return False \ No newline at end of file diff --git a/templates/OSU/5.3/osu.arm.config b/templates/OSU/5.3/osu.arm.config new file mode 100644 index 0000000000000000000000000000000000000000..68e4561eac5f7b38999698aba0c2e77215fc2a03 --- /dev/null +++ b/templates/OSU/5.3/osu.arm.config @@ -0,0 +1,84 @@ +[SERVER] +11.11.11.11 + +[DOWNLOAD] + +[DEPENDENCY] +set -e +set -x +./jarvis -install bisheng/2.5.0 com +module purge +module use ./software/modulefiles +module load bisheng/2.5.0 +export CC=`which clang` +export CXX=`which clang++` +export FC=`which flang` +./jarvis -install hmpi/1.2.0 clang +module load hmpi/1.2.0 +#test if mpi is normal +./jarvis -bench mpi +./jarvis -install osu/1.0.0 clang+mpi +mkdir -p osu + +[ENV] +module purge +module use ./software/modulefiles +module load bisheng/2.5.0 +module load hmpi/1.2.0 +module load osu/1.0.0 + +[APP] +app_name = OSU +build_dir = ${OSU_PATH}/ +binary_dir = ${OSU_PATH}/libexec/osu-micro-benchmarks/mpi/collective +case_dir = ./osu + +[BUILD] + +[CLEAN] +make clean + +[RUN] +run = mpirun -np 8 +binary = osu_alltoallv +nodes = 1 + +[JOB] +#!/bin/sh +#DSUB -n osu_batch +#DSUB --job_type cosched:hmpi +#DSUB -A root.default +#DSUB -q root.default +#DSUB -N 12 +#DSUB -R cpu=128 +#DSUB -oo osu.%J.out +#DSUB -eo osu.%J.err + +##set runtime environment variables + +ulimit -s unlimited +ulimit -c unlimited +echo "========HOST FILE======" +cat $CCS_HOST_FILE +echo "========HOST FILE======" +date +> result.txt +for i in {1..8} +do + echo "ALLREDUCE Algorithm: $i" + mpirun -N 1 -x UCX_BUILTIN_ALLREDUCE_ALGORITHM=$i $CCS_MPI_OPTIONS $OSU_PATH/libexec/osu-micro-benchmarks/mpi/collective/osu_allreduce >> result.txt +done +date + +[JOB2] +#!/bin/sh +#DSUB -n osu_single +#DSUB --job_type cosched:hmpi +#DSUB -A root.default +#DSUB -q root.default +#DSUB -N 12 +#DSUB -R cpu=128 +#DSUB -oo osu.%J.out +#DSUB -eo osu.%J.err + +mpirun -np 12 $CCS_MPI_OPTIONS $OSU_PATH/libexec/osu-micro-benchmarks/mpi/collective/osu_allreduce \ No newline at end of file