From f73fef1473ed195536dce232cbc1e2521134cc3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20M=C3=BCller?= Date: Fri, 2 Jul 2021 00:47:32 +0200 Subject: [PATCH] cgroups inside singularity container --- roles/cobald/files/cgroup.conf.noautomount | 11 +++++++++++ roles/cobald/files/slurm-slurmd.def | 1 + roles/cobald/files/start-drone | 16 ++++++++++++++-- roles/cobald/tasks/singularity.yml | 1 + 4 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 roles/cobald/files/cgroup.conf.noautomount diff --git a/roles/cobald/files/cgroup.conf.noautomount b/roles/cobald/files/cgroup.conf.noautomount new file mode 100644 index 0000000..3020de3 --- /dev/null +++ b/roles/cobald/files/cgroup.conf.noautomount @@ -0,0 +1,11 @@ +### +# +# Slurm cgroup support configuration file +# +# See man slurm.conf and man cgroup.conf for further +# information on cgroup configuration parameters +#-- +CgroupAutomount=no + +ConstrainCores=no +ConstrainRAMSpace=no diff --git a/roles/cobald/files/slurm-slurmd.def b/roles/cobald/files/slurm-slurmd.def index 33cea00..d65372f 100644 --- a/roles/cobald/files/slurm-slurmd.def +++ b/roles/cobald/files/slurm-slurmd.def @@ -4,6 +4,7 @@ From: slurm:slurmd %files 31-slurmd-configless /etc/docker-init.d/31-slurm-configless /container/volumes/munge/munge.key /etc/munge/munge.key + cgroup.conf.noautomount /etc/slurm/cgroup.conf %post rm /etc/docker-init.d/30-slurmd diff --git a/roles/cobald/files/start-drone b/roles/cobald/files/start-drone index 21e81c8..e55282f 100644 --- a/roles/cobald/files/start-drone +++ b/roles/cobald/files/start-drone @@ -8,15 +8,27 @@ function handler_quit(){ echo "shutdown container" singularity instance stop slurm-drone scontrol update NodeName=${nodename} State=FUTURE + umount /inner-cgroup/freezer + umount /inner-cgroup exit 0 } -trap handler_quit EXIT # set -x +trap handler_quit EXIT + +echo "mounting cgroups" +mkdir /inner-cgroup +mount -t tmpfs none /inner-cgroup +mkdir /inner-cgroup/freezer/ +mount --bind /sys/fs/cgroup/freezer/slurm/ /inner-cgroup/freezer/ +mount -o remount,ro /inner-cgroup + echo "starting ${nodename}" -singularity instance start --writable-tmpfs /shared/slurmd.sif slurm-drone \ +singularity instance start \ + -B /inner-cgroup/:/sys/fs/cgroup/ \ + --writable-tmpfs /shared/slurmd.sif slurm-drone \ slurm-ctl ${nodename} if [ $? -eq 0 ] ; then echo "container started, sleeping $(( 60 * ${SLURM_Walltime}))" diff --git a/roles/cobald/tasks/singularity.yml b/roles/cobald/tasks/singularity.yml index dc26f23..093e081 100644 --- a/roles/cobald/tasks/singularity.yml +++ b/roles/cobald/tasks/singularity.yml @@ -23,6 +23,7 @@ loop: - slurm-slurmd.def - 31-slurmd-configless + - cgroup.conf.noautomount register: cobald_copy_sing_files - name: remove old container