Skip to content
Failed

Console Output

Started by timer
Running as SYSTEM
Building in workspace /var/lib/jenkins/jobs/pytorch_train/workspace
[SSH] script:
TARGETNODE=""""

module load anaconda3_gpu/4.13.0
module load cuda/11.7.0

cd pytorch_train
rm -f train_results_jenkins.csv

# Slurm Arguments
sargs="--nodes=1 "
sargs+="--ntasks-per-node=1 "
sargs+="--mem=16g "
sargs+="--time=00:10:00 "
sargs+="--account=bbmb-hydro "
sargs+="--gpus-per-node=1 "
sargs+="--gpu-bind=closest "
# Add Target node if it exists
if [[ ! -z ${TARGETNODE} ]]
then
    PARTITION=`sinfo --format="%R,%N" -n hydro61  | grep hydro61  | cut -d',' -f1 | tail -1`
    sargs+="--partition=${PARTITION} "
    sargs+="--nodelist=${TARGETNODE} "
else
    sargs+="--partition=a100 "
fi
# Executable to run
scmd="python train.py | tee time.txt"

# Run the command
start_time=`date +%s.%N`
echo $"Starting srun with command"
echo "srun $sargs $scmd"
srun $sargs $scmd
end_time=`date +%s.%N`

runtime=$( echo "$end_time - $start_time" | bc -l )
echo "YVALUE=$runtime" > time.txt
printf "Pytorch test completed in %0.3f secs\n" $runtime

[SSH] executing...
[SSH] Exception:Auth fail for methods 'publickey'
com.jcraft.jsch.JSchException: Auth fail for methods 'publickey'
	at PluginClassLoader for jsch//com.jcraft.jsch.Session.connect(Session.java:521)
	at PluginClassLoader for ssh//org.jvnet.hudson.plugins.CredentialsSSHSite.createSession(CredentialsSSHSite.java:132)
	at PluginClassLoader for ssh//org.jvnet.hudson.plugins.CredentialsSSHSite.executeCommand(CredentialsSSHSite.java:208)
	at PluginClassLoader for ssh//org.jvnet.hudson.plugins.SSHBuilder.perform(SSHBuilder.java:104)
	at hudson.tasks.BuildStepMonitor$1.perform(BuildStepMonitor.java:20)
	at hudson.model.AbstractBuild$AbstractBuildExecution.perform(AbstractBuild.java:818)
	at hudson.model.Build$BuildExecution.build(Build.java:199)
	at hudson.model.Build$BuildExecution.doRun(Build.java:164)
	at hudson.model.AbstractBuild$AbstractBuildExecution.run(AbstractBuild.java:526)
	at hudson.model.Run.execute(Run.java:1894)
	at hudson.model.FreeStyleBuild.run(FreeStyleBuild.java:44)
	at hudson.model.ResourceController.execute(ResourceController.java:101)
	at hudson.model.Executor.run(Executor.java:446)
Build step 'Execute shell script on remote host using ssh' marked build as failure
Recording plot data
Saving plot series data from: /var/lib/jenkins/jobs/pytorch_train/workspace/time.txt
Sending e-mails to: [email protected]
Finished: FAILURE