From f7dd3bcf02864fae165f9b11b218fe062ceadb12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20M=C3=BCller?= Date: Fri, 30 Apr 2021 17:15:57 +0200 Subject: [PATCH] run slurmctld as user Notice: also trying to run slurmd on execute nodes as user makes no sense because it breaks sbatch. Furthermore there is another necessary to run mpi jobs (just tried MpiDefault=none). I don't consider running slurmd as root a good idea, but there seems to be no other choice at the moment. --- play.yml | 2 ++ roles/slurm/defaults/main.yml | 1 + roles/slurm/files/slurm.Dockerfile | 18 ++++++++++++++++-- roles/slurm/files/slurmctld.Dockerfile | 8 +++++++- roles/slurm/files/slurmd.Dockerfile | 6 ++++++ roles/slurm/tasks/main.yml | 2 ++ roles/slurm/templates/slurm.conf.j2 | 11 +++++------ 7 files changed, 39 insertions(+), 9 deletions(-) diff --git a/play.yml b/play.yml index d2110ba..386b1f1 100644 --- a/play.yml +++ b/play.yml @@ -51,4 +51,6 @@ tags: docker - name: "setup slurm test environment in docker containers" role: slurm + vars: + slurm_user: slurm # or root tags: slurm diff --git a/roles/slurm/defaults/main.yml b/roles/slurm/defaults/main.yml index 4c812ba..56e8641 100644 --- a/roles/slurm/defaults/main.yml +++ b/roles/slurm/defaults/main.yml @@ -1,4 +1,5 @@ container_privileged: False +slurm_user: slurm slurm_log_path_ctld: /var/log/slurm/slurmctld.log slurm_log_path_d: /var/log/slurm/slurmd.log slurm_log_path_sched: /var/log/slurm/slurmsched.log diff --git a/roles/slurm/files/slurm.Dockerfile b/roles/slurm/files/slurm.Dockerfile index 1846b73..36799a9 100644 --- a/roles/slurm/files/slurm.Dockerfile +++ b/roles/slurm/files/slurm.Dockerfile @@ -14,16 +14,30 @@ RUN chown root:root /usr/local/sbin/entrypoint.sh && \ ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ] +ARG slurmuser=slurm +ENV slurmuser=${slurmuser} + +RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\ + slurm-setuser -u $slurmuser -g $slurmuser -y + +ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log" +ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log" +ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log" + FROM base as slurmd RUN yum install -y slurm-slurmd && \ yum clean all && rm -rf /var/cache/yum -CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmd -D & })' +CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \ + slurmd -D 2>/dev/null 1>/dev/null & \ + tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })' FROM base as slurmctld RUN yum install -y slurm-slurmctld && \ yum clean all && rm -rf /var/cache/yum -CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmctld -D & })' +CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \ + su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \ + tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })' diff --git a/roles/slurm/files/slurmctld.Dockerfile b/roles/slurm/files/slurmctld.Dockerfile index 1deb7f0..efe2591 100644 --- a/roles/slurm/files/slurmctld.Dockerfile +++ b/roles/slurm/files/slurmctld.Dockerfile @@ -14,6 +14,12 @@ RUN chown root:root /usr/local/sbin/entrypoint.sh && \ ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ] +ARG slurmuser=slurm +ENV slurmuser=${slurmuser} + +RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\ + slurm-setuser -u $slurmuser -g $slurmuser -y + RUN yum install -y slurm-slurmctld && \ yum clean all && rm -rf /var/cache/yum @@ -22,5 +28,5 @@ ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log" ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log" CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \ - slurmctld -D 2>/dev/null 1>/dev/null & \ + su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \ tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })' diff --git a/roles/slurm/files/slurmd.Dockerfile b/roles/slurm/files/slurmd.Dockerfile index 2549727..e20c91b 100644 --- a/roles/slurm/files/slurmd.Dockerfile +++ b/roles/slurm/files/slurmd.Dockerfile @@ -14,6 +14,12 @@ RUN chown root:root /usr/local/sbin/entrypoint.sh && \ ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ] +ARG slurmuser=slurm +ENV slurmuser=${slurmuser} + +RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\ + slurm-setuser -u $slurmuser -g $slurmuser -y + RUN yum install -y slurm-slurmd && \ yum clean all && rm -rf /var/cache/yum diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml index 8ee8ff0..563942d 100644 --- a/roles/slurm/tasks/main.yml +++ b/roles/slurm/tasks/main.yml @@ -82,6 +82,8 @@ volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}" networks: - name: "slurm" + env: + slurmuser: "{{slurm_user}}" image: "{{item.image}}" state: started detach: True diff --git a/roles/slurm/templates/slurm.conf.j2 b/roles/slurm/templates/slurm.conf.j2 index cc7e0fe..3b11bc9 100644 --- a/roles/slurm/templates/slurm.conf.j2 +++ b/roles/slurm/templates/slurm.conf.j2 @@ -32,6 +32,7 @@ CryptoType=crypto/munge #MaxStepCount=40000 #MaxTasksPerNode=128 MpiDefault=pmix +# when running slurmd as user change to: MpiDefault=none #MpiParams=ports=#-# #PluginDir= #PlugStackConfig= @@ -57,8 +58,9 @@ SlurmctldPort=6817 SlurmdPidFile=/var/run/slurm/slurmd.pid SlurmdPort=6818 SlurmdSpoolDir=/var/spool/slurm/d -SlurmUser=root -#SlurmdUser=root +SlurmUser={{slurm_user}} +SlurmdUser=root +# SlurmdUser=slurm -> sbatch does not work #SrunEpilog= #SrunProlog= StateSaveLocation=/var/spool/slurm/ctld @@ -130,7 +132,7 @@ AccountingStorageType=accounting_storage/none #AccountingStorageUser= AccountingStoreJobComment=YES ClusterName=cluster -#DebugFlags= +#DebugFlags=Steps,TraceJobs #JobCompHost= JobCompLoc=/tmp/jobcomp #JobCompPass= @@ -141,12 +143,9 @@ JobCompType=jobcomp/filetxt JobAcctGatherFrequency=30 JobAcctGatherType=jobacct_gather/none SlurmctldDebug=verbose -#SlurmctldLogFile= SlurmctldLogFile={{slurm_log_path_ctld}} SlurmdDebug=verbose -#SlurmdLogFile= SlurmdLogFile={{slurm_log_path_d}} -#SlurmSchedLogFile= SlurmSchedLogFile={{slurm_log_path_sched}} #SlurmSchedLogLevel= #