#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks=2 
#SBATCH --ntasks-per-node=2
#SBATCH --time=00:15:00
#SBATCH --gres=gpu:4
#SBATCH --mem=10G
#SBATCH -c 1
# The following line containning "nvprofiling" is not a comment and is needed to access the GPU counters
#nvprofiling
# ------------------------
killall nv-hostengine
# Compiler environment
# ------------------------------
# ------------------------------
module purge
module load dcgm/2.3.4-1
module load openmpi/pgi/4.0.4-UCX-cuda 
module li
# Results directory
workdir=$PWD/${SLURM_JOBID}
mkdir ${workdir}
cp Makefile_ucx ${workdir}
cp ping_pong_cuda_aware.cu ${workdir}
cd ${workdir}
cp $0 .
make -f Makefile_ucx clean
make -f Makefile_ucx
# Computers list
ExpandNodeList -r -p $SLURM_NTASKS_PER_NODE $SLURM_NODELIST > machine_file
##---UCX
echo "NVIDIA smi"
nvidia-smi -L
#nvprofiling
nv-hostengine --pid nvhostengine.pid --log-filename nv-hostengine.log
export UCX_MEMTYPE_CACHE=n
dcgmi dmon -e 1011,1012 -c 2000 > compteur.${SLURM_JOB_ID} &
# ------------------------------
# Execution of the program
mpirun --machinefile machine_file -np 2 ./pp_cuda_aware
# ------------------------------
# 
kill -9 $(cat nvhostengine.pid)
jobinfo ${SLURM_JOBID}