Started by timer Running as SYSTEM Building in workspace /var/lib/jenkins/jobs/cuda_bw/workspace [SSH] script: PATH="/sw/tools/bin:/sw/tools/bin:$PATH" NODELIST="""" mkdir -p ~svchydrojenkins/hydro/cuda cd ~svchydrojenkins/hydro/cuda pwd rm -f * #export PATH=$PATH:/usr/local/cuda/bin # new hydro gpu sw stack testing #module use /hysw/spack/hydrogpu-2022-06/modules/lmod/Core module list which nvcc export CUDASAMPLES=~svchydrojenkins/hydro_sw_testing/gpu/cuda/cuda-samples cp $CUDASAMPLES/Samples/0_Introduction/matrixMul/matrixMul.cu ./ cp $CUDASAMPLES/Samples/1_Utilities/bandwidthTest/bandwidthTest.cu ./ nvcc -I $CUDASAMPLES/Common -o matrixMul matrixMul.cu if [[ -x "matrixMul" ]] then echo "matrixMul appears to have built ok" else echo "#### matrixMul is missing or not built ###############" exit -1 fi nvcc -I $CUDASAMPLES/Common -o bandwidthTest bandwidthTest.cu pwd ls -lt # if the NODELIST parameter was filled in, proceed with a targeted test, 1 cores , graph inputs in custom.txt if [[ ! -z ${NODELIST} ]] then rm -f stream_*.txt echo "YVALUE=1" > stream_triad.txt echo "YVALUE=1" > stream_gpu56c_triad.txt echo "YVALUE=1" > stream_gpu64c_triad.txt # for a single node string, it's easy enough to calculate a matching partition: PARTITION=`sinfo --format="%R,%N" -n ${NODELIST} | grep ${NODELIST} | cut -d',' -f1 | tail -1` echo "PARTITION=${PARTITION}" srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 --nodelist=${NODELIST} nsys profile --gpu-metrics-device=all ./bandwidthTest --htod srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 --nodelist=${NODELIST} ./matrixMul dcgmi profile --pause srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 --nodelist=${NODELIST} ncu --metrics "regex:.*" --target-processes all ./matrixMul dcgmi profile --resume GFLOPS=`srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 --nodelist=${NODELIST} ./matrixMul | \ grep Performance= | cut -d"=" -f2 | cut -d"G" -f1` echo GFLOPS=$GFLOPS echo "YVALUE=$GFLOPS" > gflops.txt BW=`srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 --nodelist=${NODELIST} \ ./bandwidthTest --htod | grep 32000000` BW=`echo BW=$BW | cut -d' ' -f3` echo BW=$BW echo "YVALUE=$BW" > bw.txt nvcc --version > mvcc_version.txt exit fi # otherwise, if NODELIST is "" : PARTITION=a100 srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 ./bandwidthTest --htod if [ $? -eq 0 ] then echo "slurm and srun ran successfully" else echo "srun failed" exit -1 fi srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 ./matrixMul GFLOPS=`srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 ./matrixMul | \ grep Performance= | cut -d"=" -f2 | cut -d"G" -f1` echo GFLOPS=$GFLOPS echo "YVALUE=$GFLOPS" > gflops.txt BW=`srun --partition=${PARTITION} --time=00:10:00 --ntasks=1 --gpus-per-node=1 \ ./bandwidthTest --htod | grep 32000000` BW=`echo BW=$BW | cut -d' ' -f3` echo BW=$BW echo "YVALUE=$BW" > bw.txt nvcc --version > nvcc_version.txt exit [SSH] executing... [SSH] Exception:Auth fail for methods 'publickey' com.jcraft.jsch.JSchException: Auth fail for methods 'publickey' at PluginClassLoader for jsch//com.jcraft.jsch.Session.connect(Session.java:521) at PluginClassLoader for ssh//org.jvnet.hudson.plugins.CredentialsSSHSite.createSession(CredentialsSSHSite.java:132) at PluginClassLoader for ssh//org.jvnet.hudson.plugins.CredentialsSSHSite.executeCommand(CredentialsSSHSite.java:208) at PluginClassLoader for ssh//org.jvnet.hudson.plugins.SSHBuilder.perform(SSHBuilder.java:104) at hudson.tasks.BuildStepMonitor$1.perform(BuildStepMonitor.java:20) at hudson.model.AbstractBuild$AbstractBuildExecution.perform(AbstractBuild.java:818) at hudson.model.Build$BuildExecution.build(Build.java:199) at hudson.model.Build$BuildExecution.doRun(Build.java:164) at hudson.model.AbstractBuild$AbstractBuildExecution.run(AbstractBuild.java:526) at hudson.model.Run.execute(Run.java:1894) at hudson.model.FreeStyleBuild.run(FreeStyleBuild.java:44) at hudson.model.ResourceController.execute(ResourceController.java:101) at hudson.model.Executor.run(Executor.java:446) Build step 'Execute shell script on remote host using ssh' marked build as failure Recording plot data Saving plot series data from: /var/lib/jenkins/jobs/cuda_bw/workspace/gflops.txt Saving plot series data from: /var/lib/jenkins/jobs/cuda_bw/workspace/bw.txt Sending e-mails to: [email protected] Finished: FAILURE