Compare commits

..

3 Commits

Author SHA1 Message Date
f7dd3bcf02 run slurmctld as user
Notice: also trying to run slurmd on execute nodes as user makes no
sense because it breaks sbatch. Furthermore there is another necessary
to run mpi jobs (just tried MpiDefault=none). I don't consider running
slurmd as root a good idea, but there seems to be no other choice at the
moment.
2021-04-30 17:15:57 +02:00
f2cb9b2c6b fixed log (includes log output from tasks now) 2021-04-30 16:47:31 +02:00
38a5b89de9 minor fixes 2021-04-29 12:19:33 +02:00
8 changed files with 86 additions and 31 deletions

View File

@@ -51,4 +51,6 @@
tags: docker
- name: "setup slurm test environment in docker containers"
role: slurm
vars:
slurm_user: slurm # or root
tags: slurm

View File

@@ -1 +1,5 @@
container_privileged: False
slurm_user: slurm
slurm_log_path_ctld: /var/log/slurm/slurmctld.log
slurm_log_path_d: /var/log/slurm/slurmd.log
slurm_log_path_sched: /var/log/slurm/slurmsched.log

View File

@@ -1,8 +1,9 @@
#!/usr/bin/env bash
set -e
if [ -f "/etc/munge/munge.key" ] ; then
chown munge:munge /etc/munge/munge.key
chmod 600 /etc/munge/munge.key
fi
exec "$@"

View File

@@ -2,25 +2,42 @@ FROM docker.io/library/centos:7 as base
RUN yum install -y epel-release && \
yum install -y slurm && \
yum clean all
yum clean all && rm -rf /var/cache/yum
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
yum clean all
yum clean all && rm -rf /var/cache/yum
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
chmod 755 /usr/local/sbin/entrypoint.sh
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
ARG slurmuser=slurm
ENV slurmuser=${slurmuser}
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
slurm-setuser -u $slurmuser -g $slurmuser -y
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
FROM base as slurmd
RUN yum install -y slurm-slurmd && \
yum clean all
yum clean all && rm -rf /var/cache/yum
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
slurmd -D 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
FROM base as slurmctld
RUN yum install -y slurm-slurmctld && \
yum clean all
yum clean all && rm -rf /var/cache/yum
# CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'

View File

@@ -1,20 +1,32 @@
FROM docker.io/library/centos:7 as base
RUN yum install -y epel-release && \
yum install -y slurm slurm-slurmctld && \
yum clean all
yum install -y slurm && \
yum clean all && rm -rf /var/cache/yum
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
yum clean all
RUN yum install -y slurm-slurmctld && \
yum clean all
yum clean all && rm -rf /var/cache/yum
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
chmod 755 /usr/local/sbin/entrypoint.sh
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmctld -D & })'
# ... & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
ARG slurmuser=slurm
ENV slurmuser=${slurmuser}
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
slurm-setuser -u $slurmuser -g $slurmuser -y
RUN yum install -y slurm-slurmctld && \
yum clean all && rm -rf /var/cache/yum
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'

View File

@@ -1,17 +1,32 @@
FROM docker.io/library/centos:7
RUN yum install -y epel-release && \
yum install -y slurm slurm-slurmd && \
yum clean all
yum install -y slurm && \
yum clean all && rm -rf /var/cache/yum
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
yum clean all
yum clean all && rm -rf /var/cache/yum
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
chmod 755 /usr/local/sbin/entrypoint.sh
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
# CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmd -D & })'
ARG slurmuser=slurm
ENV slurmuser=${slurmuser}
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
slurm-setuser -u $slurmuser -g $slurmuser -y
RUN yum install -y slurm-slurmd && \
yum clean all && rm -rf /var/cache/yum
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
slurmd -D 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'

View File

@@ -82,6 +82,8 @@
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
networks:
- name: "slurm"
env:
slurmuser: "{{slurm_user}}"
image: "{{item.image}}"
state: started
detach: True

View File

@@ -32,6 +32,7 @@ CryptoType=crypto/munge
#MaxStepCount=40000
#MaxTasksPerNode=128
MpiDefault=pmix
# when running slurmd as user change to: MpiDefault=none
#MpiParams=ports=#-#
#PluginDir=
#PlugStackConfig=
@@ -57,8 +58,9 @@ SlurmctldPort=6817
SlurmdPidFile=/var/run/slurm/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurm/d
SlurmUser=root
#SlurmdUser=root
SlurmUser={{slurm_user}}
SlurmdUser=root
# SlurmdUser=slurm -> sbatch does not work
#SrunEpilog=
#SrunProlog=
StateSaveLocation=/var/spool/slurm/ctld
@@ -130,7 +132,7 @@ AccountingStorageType=accounting_storage/none
#AccountingStorageUser=
AccountingStoreJobComment=YES
ClusterName=cluster
#DebugFlags=
#DebugFlags=Steps,TraceJobs
#JobCompHost=
JobCompLoc=/tmp/jobcomp
#JobCompPass=
@@ -141,10 +143,10 @@ JobCompType=jobcomp/filetxt
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
SlurmctldDebug=verbose
#SlurmctldLogFile=
SlurmctldLogFile={{slurm_log_path_ctld}}
SlurmdDebug=verbose
#SlurmdLogFile=
#SlurmSchedLogFile=
SlurmdLogFile={{slurm_log_path_d}}
SlurmSchedLogFile={{slurm_log_path_sched}}
#SlurmSchedLogLevel=
#
#
@@ -163,4 +165,4 @@ SlurmdDebug=verbose
# COMPUTE NODES
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
NodeName=slurm-submit1 CPUs=1 State=UNKNOWN
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] Default=YES MaxTime=INFINITE State=UP
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes=slurm-submit1 Default=YES MaxTime=INFINITE State=UP