Skip to content
Failed

Console Output

Started by user Jeremy Enos
Running as SYSTEM
Building in workspace /var/lib/jenkins/jobs/pytorch_infer/workspace
[SSH] script:
TARGETNODE=""hydro01""

module load anaconda3_gpu/4.13.0
module load cuda/11.7.0

cd pytorch_infer
rm -f infer_results_jenkins.csv

# Slurm Arguments
sargs="--nodes=1 "
sargs+="--ntasks-per-node=1 "
sargs+="--mem=16g "
sargs+="--time=00:10:00 "
sargs+="--account=bbmb-hydro "
sargs+="--gpus-per-node=1 "
sargs+="--gpu-bind=closest "
# Add Target node if it exists
if [[ ! -z ${TARGETNODE} ]]
then
    PARTITION=`sinfo --format="%R,%N" -n hydro61  | grep hydro61  | cut -d',' -f1 | tail -1`
    sargs+="--partition=${PARTITION} "
    sargs+="--nodelist=${TARGETNODE} "
else
    sargs+="--partition=a100 "
fi
# Executable to run
scmd="python benchmark.py --model-list jenkins_list_short.txt --bench inference --channels-last --results-file infer_results_jenkins.csv"

# Run the command
start_time=`date +%s.%N`
echo $"Starting srun with command"
echo "srun $sargs $scmd"
srun $sargs $scmd
end_time=`date +%s.%N`

python transpose_results.py

runtime=$( echo "$end_time - $start_time" | bc -l )
echo "YVALUE=$runtime" > time.txt
printf "Pytorch test completed in %0.3f secs\n" $runtime

[SSH] executing...
Starting srun with command
srun --nodes=1 --ntasks-per-node=1 --mem=16g --time=00:10:00 --account=bbmb-hydro --gpus-per-node=1 --gpu-bind=closest --partition=all --nodelist=hydro01  python benchmark.py --model-list jenkins_list_short.txt --bench inference --channels-last --results-file infer_results_jenkins.csv
srun: error: GPU resources cannot be requested on a CPU-only partition.
srun: error: If GPUs are needed, please specify an a100 partition.
srun: error: Unable to allocate resources: Access/permission denied
Traceback (most recent call last):
  File "/u/svchydrojenkins/pytorch_infer/transpose_results.py", line 2, in <module>
    df = pd.read_csv('infer_results_jenkins.csv')
  File "/sw/external/python/anaconda3_gpu/lib/python3.9/site-packages/pandas/util/_decorators.py", line 311, in wrapper
    return func(*args, **kwargs)
  File "/sw/external/python/anaconda3_gpu/lib/python3.9/site-packages/pandas/io/parsers/readers.py", line 680, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/sw/external/python/anaconda3_gpu/lib/python3.9/site-packages/pandas/io/parsers/readers.py", line 575, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/sw/external/python/anaconda3_gpu/lib/python3.9/site-packages/pandas/io/parsers/readers.py", line 933, in __init__
    self._engine = self._make_engine(f, self.engine)
  File "/sw/external/python/anaconda3_gpu/lib/python3.9/site-packages/pandas/io/parsers/readers.py", line 1217, in _make_engine
    self.handles = get_handle(  # type: ignore[call-overload]
  File "/sw/external/python/anaconda3_gpu/lib/python3.9/site-packages/pandas/io/common.py", line 789, in get_handle
    handle = open(
FileNotFoundError: [Errno 2] No such file or directory: 'infer_results_jenkins.csv'
Pytorch test completed in 0.018 secs

[SSH] completed
[SSH] exit-status: 0

[workspace] $ /bin/sh -xe /tmp/jenkins10754840484248311096.sh
+ scp 'HYDRO_REMOTE:~svchydrojenkins/pytorch_infer/time.txt' /var/lib/jenkins/jobs/pytorch_infer/workspace
+ scp 'HYDRO_REMOTE:~svchydrojenkins/pytorch_infer/infer_results_jenkins.csv' /var/lib/jenkins/jobs/pytorch_infer/workspace
scp: /u/svchydrojenkins/pytorch_infer/infer_results_jenkins.csv: No such file or directory
Build step 'Execute shell' marked build as failure
Recording plot data
Saving plot series data from: /var/lib/jenkins/jobs/pytorch_infer/workspace/time.txt
Sending e-mails to: [email protected]
Finished: FAILURE