diff --git a/init.sh b/init.sh index 0d638117763a464e0c3d3c263c52ec3682d7015d..9d6776ecccd5f6fd0b9aa9c8f85b7df2768f704f 100644 --- a/init.sh +++ b/init.sh @@ -19,4 +19,5 @@ export CHECK_ROOT=${CUR_PATH}/package/common/check_root.sh if ! type module >/dev/null 2>&1;then echo "Install environment-modules" . $CHECK_ROOT && yum install -y environment-modules || apt install -y environment-modules + source /etc/profile fi diff --git a/package/osu/1.0.0/install.sh b/package/osu/1.0.0/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..eb56895bc0de5345b0d24b2155500573f9940d45 --- /dev/null +++ b/package/osu/1.0.0/install.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -x +set -e +. ${DOWNLOAD_TOOL} -u https://github.com/forresti/osu-micro-benchmarks/archive/refs/heads/master.zip -f osu-micro-benchmarks.zip +cd ${JARVIS_TMP} +unzip ${JARVIS_DOWNLOAD}/osu-micro-benchmarks.zip +cd osu-micro-benchmarks-master +./configure --prefix=$1 CC=mpicc CXX=mpicxx +make +make install diff --git a/src/installService.py b/src/installService.py index 14be3d971a84b074d32f76982574e82c3cfa6a45..5ad7957823bbae01e198c9706660e7b6b1932461 100644 --- a/src/installService.py +++ b/src/installService.py @@ -89,10 +89,10 @@ class InstallService: return self.gen_compiler_dict("icc", ('2018', "2018.4")) def get_hmpi_version(self): - mpirun_path = self.get_cmd_output('which mpirun')[0] - hmpi_path = os.path.dirname(mpirun_path) - hmpi_path = os.path.dirname(hmpi_path) - libucg_path = os.path.join(hmpi_path, "hucx/lib") + ucx_path = self.get_cmd_output('which ucx_info')[0] + ucx_path = os.path.dirname(ucx_path) + ucx_path = os.path.dirname(ucx_path) + libucg_path = os.path.join(ucx_path, "lib") libucg_so_flag = "libucg.so." version = None for file_name in os.listdir(libucg_path): diff --git a/templates/bcc-esm/bccesm.arm.cpu.config b/templates/bcc-esm/bccesm.arm.cpu.config new file mode 100644 index 0000000000000000000000000000000000000000..c0dd5f50645657ab634eab63bb8d578a777a4172 --- /dev/null +++ b/templates/bcc-esm/bccesm.arm.cpu.config @@ -0,0 +1,115 @@ +[SERVER] +11.11.11.11 + +[DOWNLOAD] +bcc-esm/4.0 http://forecast.bcccsm.ncc-cma.net/web/ + +[DEPENDENCY] +set -x +set -e +./jarvis -install kml/1.6.0/gcc any +./jarvis -install openblas/0.3.18 any +./jarvis -install bisheng/2.1.0 com +module purge +module use ./software/modulefiles +module load bisheng/2.1.0 +export CC=`which clang` CXX=`which clang++` FC=`which flang` +./jarvis -install hmpi/1.1.1 bisheng +module load hmpi/1.1.1 +./jarvis -bench mpi +./jarvis -install fftw/3.3.8 bisheng +export CC=mpicc CXX=mpicxx FC=mpifort F77=mpifort +./jarvis -install hdf5/1.10.1 clang+mpi +./jarvis -install pnetcdf/1.11.2 clang+mpi +./jarvis -install netcdf/4.7.0 clang+mpi +unzip ${JARVIS_DOWNLOAD}/esm.zip + +[ENV] +module purge +module use ./software/moduledeps +module use software/modulefiles +module load bisheng/2.1.0 +module load hmpi/1.1.1 +module load openblas/0.3.18 +module load kml-gcc/1.6.0 +export LIBS_DIR=/workspace/public/software/libs +export NETCDF_DIR=${NETCDF_PATH} +export PNETCDF_DIR=${PNETCDF_PATH} +export FFTW_DIR=${FFTW_PATH} +export MPI_DIR=${HMPI_PATH} +export USER_CC=mpicc +export USER_FC=mpifort +export CC=mpicc CXX=mpicxx FC=mpifort F77=mpifort +export MODEL_PATH=$JARVIS_ROOT/esm +export DATA_PATH=$JARVIS_ROOT/INIDATA/data +export CSMDATA=$JARVIS_ROOT/INIDATA +export WORK_PATH=$MODEL_PATH/T382L70p25 +export KML_LIB=${KML_GCC_PATH}/lib + +[APP] +app_name = BCCESM +build_dir = $WORK_PATH +binary_dir = +case_dir = $WORK_PATH + +[BUILD] +rm -rf output/atm/atm +rm -rf output/lnd/lnd +rm -rf output/ocn/ocn +rm -rf output/cpl/cpl +rm -rf output/ice/ice +csh -f build.csh 1 + +[CLEAN] +rm -rf OBJ + +[RUN] +#run = ./build.csh 2 2>&1 | tee run.log +run = ./build.csh 2 +binary = +nodes = 1 + +[BATCH] +#! /bin/bash + +time1=$(date "+%Y-%m-%d"); +perfdir="perf-data/${time1}" +mkdir $perfdir +cp output/atm/atm.log.* $perfdir/atm.log +cp output/atm/timing.0 $perfdir/timing.atm.log +cp output/cpl/cpl.log.* $perfdir/cpl.log +cp output/ice/ice.log.* $perfdir/ice.log +cp output/lnd/lnd.log.* $perfdir/lnd.log +cp output/lnd/timing.0 $perfdir/timing.lnd.log +cp output/ocn/ocn.log.* $perfdir/ocn.log +echo "All of the data has archived under $perfdir" + +# PRECT_GPCP(gpcp_vars) SST_HADISST TS_NCEP +exit 0 +outputdir="job10-12M" +export MSS_testpath=/share/output/atm +export WKDIR=$JARVIS_ROOT/diag-ncl/$outputdir/ +image_dir=$WKDIR/${outputdir}-obs +export test_prefix=$outputdir +cd $MSS_testpath +echo "COMPUTE TEST CASE CLIMATOLOGY..." +cur_var="" +for n in {1..12}; do + if [ $n -lt 10 ]; then + cur_var="$cur_var historical.cam2.h0.2001-0$n.nc" + else + cur_var="$cur_var historical.cam2.h0.2001-$n.nc" + fi +done +echo "handing $cur_var" +ncea -4 -O $cur_var ${test_prefix}_ANN_climo.nc +ncatted -O -a yrs_averaged,global,c,c,2001 ${test_prefix}_ANN_climo.nc +echo "Image generate..." +cd $JARVIS_ROOT/diag-ncl +./year_mean.csh +mkdir $outputdir +cd $outputdir +cp $image_dir/set5_6/set5_ANN_TS_NCEP_obsc.gif ./ +cp $image_dir/set5_6/set5_ANN_SST_HADISST_obsc.gif ./ +cp $image_dir/set5_6/set5_ANN_PRECT_GPCP_obsc.gif ./ +cp $image_dir/set7/set7_ANN_ICEFRAC_HADISST_NP_obsc.gif ./ \ No newline at end of file diff --git a/templates/bcc-esm/build.csh b/templates/bcc-esm/build.csh new file mode 100644 index 0000000000000000000000000000000000000000..0f5e02d6fd8fe4a7fc8ccf74194d4082378c7aa2 --- /dev/null +++ b/templates/bcc-esm/build.csh @@ -0,0 +1,218 @@ +#! /bin/csh -f + +setenv NETCDF ${NETCDF_DIR} +setenv MPI_ROOT ${MPI_DIR} + + +limit datasize unlimited +setenv XLSMPOPTS stack=860000000 +setenv OMP_STACKSIZE 3G + +setenv Atmosphere_Model CAM3 +setenv System_Time_Type day # month/day +setenv Integration_Time 2 +setenv Runoff_clm_ann .true. +setenv carbon .true. + +##-------------wangln For Multi CPL------------------------------------- +echo ------------------------------------------------------------------------- +set PATH=/bin +set PATH=($PATH /usr/bin ) +set PATH=($PATH /usr/sbin ) +set PATH=($PATH /usr/lpp/ssp/bin ) +set PATH=($PATH /usr/lpp/ssp/kerberos/bin) +set PATH=($PATH /usr/lpp/LoadL/full/bin /usr/local/lib/grads /usr/sbin/acct) +set PATH=($PATH /usr/java130/bin /u/weimin/soft /usr/local/bin) +set PATH=($PATH /usr/dt/bin /usr/lpp/X11/lib /usr/bin/X11) +set PATH=($PATH /usr/dt/bin /usr/X11R6/lib /usr/bin/X11) +echo ------------------------------------------------------------------------- +echo ------------------------------------------------------------------------- +echo b1. Set case sensitive environment variables available to model setup scripts +echo ------------------------------------------------------------------------- +setenv CASE historical # case name +setenv GRID T382_gxp25 +setenv RUNTYPE startup # startup, continue, branch, hybrid +setenv SETBLD auto # auto, true, false +setenv BASEDATE 2001-01-01 + +setenv INIFILEDATE 2001-01-01 +setenv OCNINIFILEDATE 20010101.000000 + +setenv CASESTR "Datm CLM3 MOM SIS 0 Start" # short descriptive text string +setenv CSMROOT $MODEL_PATH # root directory of source +############################################################################# +# allocate exe directory +############################################################################# +setenv EXEROOT $WORK_PATH/output + +############################################################################# +setenv ARCROOT $WORK_PATH/RESULT/ARC # archive root directory +setenv REFCASE $WORK_PATH/RESTART # Runtype=branch data case +setenv REFDATE 0001-01-06 # Runtype=branch start date + +echo ------------------------------------------------------------------------- +echo b2. Select multi-processing and resolution specs +echo The task and thread settings depend on the grid being used +echo Use NTASK=1 and NTHRD=1 for data models +echo ------------------------------------------------------------------------- + +set MODELS = ( atm lnd ice ocn cpl ) # generic model names. +set SETUPS = ( bccam3.0 bccavim3.0 sis mom cpl5.3 ) # setup script name +set NTASKS=( 992 104 64 200 4 ) +set NTHRDS=( 4 4 4 1 4 ) +echo ------------------------------------------------------------------------- +echo c. The following environment variables can be set by the user but +echo by default are derived from the environment variables above +echo ------------------------------------------------------------------------- + +setenv MSSNAME `echo $LOGNAME | tr '[a-z]' '[A-Z]'` # LOGNAME in caps + +setenv MSSDIR mss:$WORK_PATH/RESULT # MSS directory path name +setenv MSSRPD 0 # MSS file retention period +setenv MSSPWD $LOGNAME # MSS file write password + +setenv SCRIPTS $WORK_PATH # run scripts are here +setenv TOOLS $MODEL_PATH/tools # some tools are here +setenv LOGDIR $EXEROOT/RESULT # save stdout here +setenv CSMCODE $MODEL_PATH/models # base dir for src code +setenv CSMUTL $CSMCODE/utils # Util directory +setenv CSMSHR $CSMCODE/csm_share # shared code dir +setenv CSMBLD $CSMCODE/bld # makefiles are here +setenv LID "`date +%y%m%d-%H%M%S`" # time-stamp/file-ID string + +setenv OBJROOT $WORK_PATH/OBJ # build code here +setenv LIBROOT $MODEL_PATH/lib # Location of supplemental libraries +setenv INCROOT $LIBROOT/include # Location of supplemental includes/modfiles + +setenv LFSINP $CSMDATA # LOCAL INPUTDATA FSROOT +setenv LMSINP $WORK_PATH/INIDATA # LOCAL INPUTDATA MSROOT +setenv LMSOUT $EXEROOT/RESULT # LOCAL OUTPUT MSROOT +setenv MACINP dataproc.ucar.edu # REMOTE INPUT MACHINE +setenv RFSINP /fs/cgd/ccsm/inputdata # REMOTE INPUTDATA FSROOT +setenv RMSINP /CCSM/inputdata # REMOTE INPUTDATA MSROOT +setenv MACOUT dataproc.ucar.edu # REMOTE OUTPUT MACHINE +setenv RFSOUT /fc44/$LOGNAME/archive/$CASE # REMOTE OUTPUT FSROOT + +#--- logic to set BLDTYPE based on SETBLD above +setenv BLDTYPE $SETBLD +if ($SETBLD =~ auto*) then + setenv BLDTYPE true + if ($RUNTYPE == 'continue') setenv BLDTYPE false +endif +if ($BLDTYPE != 'true' && $BLDTYPE != 'false') then + echo "error in BLDTYPE: $BLDTYPE" + exit 1 +endif + +echo ------------------------------------------------------------------------- +echo d. Determine os/machine/site +echo ------------------------------------------------------------------------- + +setenv OS `uname -s` # operating system +setenv ARCH AARCH64 +setenv MACH ifc +setenv MACHKEY `hostname` +setenv SITE NCC + +echo ------------------------------------------------------------------------- +echo e. Create ccsm_joe +echo ------------------------------------------------------------------------- + +setenv CSMJOE $SCRIPTS/ccsm_joe +rm -f $CSMJOE +$TOOLS/ccsm_checkenvs > $CSMJOE + +echo ------------------------------------------------------------------------- +echo f. Prepare $GRID component models for execution +echo - create execution directories for atm,cpl,lnd,ice,ocn +echo - invoke component model setup scripts found in $SCRIPTS +echo ------------------------------------------------------------------------- + +setenv ATM_GRID `echo $GRID | sed s/_.\*//`; setenv LND_GRID $ATM_GRID +setenv OCN_GRID `echo $GRID | sed s/.\*_//`; setenv ICE_GRID $OCN_GRID + +#--- create working directories +foreach DIR ( $EXEROOT $LIBROOT $INCROOT $OBJROOT $LOGDIR) + if !(-d $DIR) mkdir -p $DIR +end +#--- run machine dependent commands (i.e. modules on SGI). +echo $TOOLS/modules.$OS.$MACH +###if (-f $TOOLS/modules.$OS.$MACH) source $TOOLS/modules.$OS.$MACH || exit 1 ###if (-f $TOOLS/modules.$OS.$MACH) module load emacs null GNU.tools MASS netcdf +#--- create env variables for use in components +foreach n (1 2 3 4 5) + set model = $MODELS[$n] + setenv ${model}_dir $EXEROOT/$model; setenv ${model}_setup $SETUPS[$n] + setenv ${model}_in $model.stdin ; setenv ${model}_out $model.log.$LID + echo ${model}_in +end +#--- get restart files +#$TOOLS/ccsm_getrestart +echo ------------------------------------------------------------------------- +echo g. Build Earth System Modeling Framework http://www.esmf.ucar.edu +echo ------------------------------------------------------------------------- + +setenv EXEDIR $EXEROOT/esmf ; if !(-d $EXEDIR) mkdir -p $EXEDIR +cd $EXEDIR +echo `date` $EXEDIR/esmf.log.$LID | tee esmf.log.$LID +if ( $argv[1] == 1 ) then + $SCRIPTS/esmf.setup.csh >>& esmf.log.$LID || exit 1 +endif + +echo ------------------------------------------------------------------------- +echo h. Execute component setup.csh scripts, build models +echo ------------------------------------------------------------------------- + +foreach n (1 2 3 4 5) +#--- activate stdin/stdout redirect work-around --- +#--- setup env variables for components and grids --- + setenv MODEL $MODELS[$n] ; setenv SETUP $SETUPS[$n] + setenv NTHRD $NTHRDS[$n] ; setenv NTASK $NTASKS[$n] + setenv OBJDIR $OBJROOT/$MODEL/$SETUP ; if !(-d $OBJDIR) mkdir -p $OBJDIR + setenv EXEDIR $EXEROOT/$MODEL ; if !(-d $EXEDIR) mkdir -p $EXEDIR + setenv THREAD FALSE ; if ($NTHRD > 1) setenv THREAD TRUE + + set ntask = $NTASKS[$n] + + cd $EXEDIR +#xjx rm -f $MODEL.log.* + echo `date` $EXEDIR/$MODEL.log.$LID | tee $MODEL.log.$LID + echo $SCRIPTS/$SETUP.setup.csh +if ( $argv[1] == 1 ) then + $SCRIPTS/$SETUP.setup.csh >>& $MODEL.log.$LID +endif + if ($status != 0) then + echo ERROR: $MODEL.setup.csh failed, see $MODEL.log.$LID + echo ERROR: cat $cwd/$MODEL.log.$LID + exit 99 + endif + +#--- create model directories and processor counts for each platform +#--- ($EXEROOT/all for SGI, poe_sw.cmdfile for AIX, prun.cmdfile for OSF1) + +if ($n == 1) then +rm -rf $EXEROOT/csm.conf +set P1 = 0 +set P2 = 0 +endif +set P2 = `expr $P1 + $NTASK - 1` +echo "$P1-$P2 $EXEROOT/$MODEL/$MODEL" >> $EXEROOT/csm.conf +set P1 = `expr $P2 + 1` + +end + +if ( $argv[1] == 2 ) then + cd $EXEROOT + rm -rf atm/historical.cam2* atm/atm.log.* cpl/cpl.log.* esmf/esmf.log.* ice/ice.log.* lnd/lnd.log.* + rm -rf ocn/ocn.log.* esm_* esm_err_* + echo ------------------------------------------------------------------------- + echo j. Run the model, execute models simultaneously allocating CPUs + echo ------------------------------------------------------------------------- + echo "`date` -- CSM EXECUTION BEGINS HERE" + setenv I_MPI_COMPATIBILITY 4 + setenv KMP_AFFINITY compact + echo "`date` -- CSM JOB SUBMIT HAS FINISHED" + chmod +x run.sh + dsub -s run.sh + env | egrep '(MP_|LOADL|XLS|FPE|DSM|OMP|MPC)' # document above env vars +endif +exit 0 diff --git a/templates/bcc-esm/run.sh b/templates/bcc-esm/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..6aa258db1c0e36f75b63a80e3eb12e43750c56a4 --- /dev/null +++ b/templates/bcc-esm/run.sh @@ -0,0 +1,86 @@ +#!/bin/bash +#DSUB -n esm-29n +#DSUB --job_type cosched +#DSUB -N 29 +#DSUB -R "cpu=128;mem=256000" +#DSUB -A root.default +#DSUB -q root.default +#DSUB -o esm_%J.log +#DSUB -e esm_err_%J.log + +echo ----- print env vars ----- +if [ "${CCSCHEDULER_ALLOC_FILE}" != "" ]; then + echo " " + ls -la ${CCSCHEDULER_ALLOC_FILE} + echo ------ cat ${CCSCHEDULER_ALLOC_FILE} + cat ${CCSCHEDULER_ALLOC_FILE} +fi + +export HOSTFILE=/tmp/hostfile.$$ +rm -rf $HOSTFILE +touch $HOSTFILE +tmpfs=$HOSTFILE.1 +ntask=`cat ${CCSCHEDULER_ALLOC_FILE} | sort | awk -v fff="$HOSTFILE" '{} +{ + split($0, a, " ") + if (length(a[1]) >0 && length(a[3]) >0) { + print a[1]" slots="a[2] >> fff + total_task+=a[2] + } +}END{print total_task}'` +head -n 28 $HOSTFILE > $tmpfs +sed -i 's/128/96/' $tmpfs +tail -n 1 $HOSTFILE >> $tmpfs +sed -i 's/128/24/' $tmpfs +mv $tmpfs $HOSTFILE +echo "openmpi hostfile $HOSTFILE generated:" +echo "-----------------------" +cat $HOSTFILE +echo "-----------------------" +echo "Total tasks is $ntask" +echo "mpirun -hostfile $HOSTFILE -n $ntask " +sed -i "15c layout =20,10," ocn/input.nml +#generate run_roce.sh +cat <<\EOF > run_roce.sh +#!/bin/bash +rank=$OMPI_COMM_WORLD_RANK + +idx=$(expr $rank % 32 / 8) +# atm lnd ice ocn cpl +NTASKS=( 2304 96 96 200 16 ) +NTHRDS=( 1 1 1 1 1 ) + +lnd_proc=`expr ${NTASKS[0]} + ${NTASKS[1]}` +ice_proc=`expr $lnd_proc + ${NTASKS[2]}` +ocn_proc=`expr $ice_proc + ${NTASKS[3]}` +cpl_proc=`expr $ocn_proc + ${NTASKS[4]}` +if [ $rank -lt ${NTASKS[0]} ]; then + echo '-----atm------'$rank + export OMP_NUM_THREADS=${NTHRDS[0]} + $EXEROOT/atm/atm +elif [ $rank -lt $lnd_proc ]; then + echo '-----lnd------'$rank + export OMP_NUM_THREADS=${NTHRDS[1]} + $EXEROOT/lnd/lnd +elif [ $rank -lt $ice_proc ]; then + echo '-----ice------'$rank + export OMP_NUM_THREADS=${NTHRDS[2]} + $EXEROOT/ice/ice +elif [ $rank -lt $ocn_proc ]; then + echo '-----ocn------'$rank + export OMP_NUM_THREADS=${NTHRDS[3]} + $EXEROOT/ocn/ocn +elif [ $rank -lt $cpl_proc ]; then + echo '-----cpl------'$rank + export OMP_NUM_THREADS=${NTHRDS[4]} + echo "thread distribution --${NTHRDS[0]}, ${NTHRDS[1]},${NTHRDS[2]}" + echo "rank distribution --$lnd_proc, $ice_proc,$ocn_proc" + $EXEROOT/cpl/cpl +fi + +EOF +chmod +x run_roce.sh +date +mpirun --allow-run-as-root --mca plm_rsh_agent /opt/batch/agent/tools/dstart -mca btl ^vader,tcp,openib,uct -mca coll ^ucx -x UCX_BUILTIN_ALLREDUCE_ALGORITHM=6 -x UCX_BUILTIN_ALLTOALLV_ALGORITHM=1 -hostfile $HOSTFILE -np 2712 -x LD_LIBRARY_PATH -x PATH -x OMP_WAIT_POLICY=ACTIVE --bind-to socket run_roce.sh +date +exit 0