slurm container running when drone started

This commit is contained in:
2021-07-01 15:19:35 +02:00
parent d88761ca7d
commit 8bc2f717e0
8 changed files with 114 additions and 4 deletions

View File

@@ -0,0 +1,3 @@
#!/bin/sh
[ /slurm-singimage/slurmd.sif -nt /shared/slurmd.sif ] && \
cp /slurm-singimage/slurmd.sif /shared/slurmd.sif

View File

@@ -0,0 +1,3 @@
#!/bin/sh
slurmd --conf-server ${slurmctld} -D -N ${nodename} 2>/dev/null 1>/dev/null &
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}

View File

@@ -0,0 +1,30 @@
Bootstrap: docker-daemon
From: slurm:slurmd
%files
31-slurmd-configless /etc/docker-init.d/31-slurm-configless
/container/volumes/munge/munge.key /etc/munge/munge.key
%post
rm /etc/docker-init.d/30-slurmd
chmod 755 /etc/docker-init.d/31-slurm-configless
%startscript
if [ -z "${1}" -o -z "${2}" ] ; then
echo "undefined variables slurmctld or nodename"
exit 1
fi
export slurmctld="${1}"
export nodename="${2}"
echo ${slurmctld} ${nodename} ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
exec /usr/local/sbin/entrypoint.sh /usr/local/sbin/docker-init
%runscript
if [ -z "${1}" -o -z "${2}" ] ; then
echo "undefined variables slurmctld or nodename"
exit 1
fi
export slurmctld="${1}"
export nodename="${2}"
echo ${slurmctld} ${nodename} ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
exec /usr/local/sbin/entrypoint.sh /usr/local/sbin/docker-init

View File

@@ -0,0 +1,25 @@
#!/bin/sh
#SBATCH -D /shared
export
echo $@
nodename=$(hostname | awk '{ print "drone" substr($1,match($1, "([[:digit:]]+)")) }')
function handler_quit(){
echo "shutdown container"
singularity instance stop slurm-drone
scontrol update NodeName=${nodename} State=FUTURE
exit 0
}
trap handler_quit EXIT
# set -x
echo "starting ${nodename}"
singularity instance start --writable-tmpfs /shared/slurmd.sif slurm-drone \
slurm-ctl ${nodename}
if [ $? -eq 0 ] ; then
echo "container started, sleeping $(( 60 * ${SLURM_Walltime}))"
sleep $(( 60 * ${SLURM_Walltime} ))
fi
handler_quit