new startup for cobald containers

This commit is contained in:
2021-06-17 14:55:21 +02:00
parent cc43a39ea3
commit 6eb6984d6a
10 changed files with 102 additions and 33 deletions

View File

@@ -19,6 +19,7 @@ all:
slurm:
hosts:
ed-c7-1:
ed-c7-2:
cobald:
hosts:
ed-c7-1:

View File

@@ -53,10 +53,10 @@
- htcondor-containered
- htcondor
- hosts: slurm
- hosts: slurm, cobald
vars:
container_privileged: True
num_nodes: 3
slurm_num_nodes: 10
roles:
- name: "setup docker"
role: docker
@@ -65,15 +65,12 @@
role: slurm
vars:
slurm_user: slurm # or root
num_nodes: "{{slurm_num_nodes}}"
when: '"slurm" in group_names'
tags: slurm
- hosts: cobald
roles:
- name: "setup docker"
role: docker
tags: docker
- name: "install cobald"
role: cobald
vars:
docker_network: slurm
when: '"cobald" in group_names'
tags: cobald

View File

@@ -0,0 +1,18 @@
#!/bin/bash
function trp_term(){
echo pkill -P $pids
for j in $pids ; do
pkill -P $j
kill -SIGTERM $j
done
}
trap trp_term SIGINT SIGTERM
pids=""
for i in /etc/docker-init.d/* ; do
[ ! -f $i ] && break
$i &
pids="$pids $!"
done
wait $pids
# TODO: call start scripts like "foo.sh start" and "foo.sh stop" to avoid pkill

View File

@@ -1,4 +1,4 @@
FROM docker.io/library/centos:7 as base
FROM docker.io/library/centos:7
RUN yum install -y epel-release && \
yum install -y slurm && \
@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
yum clean all && rm -rf /var/cache/yum
# FIXME
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
RUN yum install -y slurm-slurmctld && \
yum clean all && rm -rf /var/cache/yum
COPY docker-init /usr/local/sbin/docker-init
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
# FIXME
COPY start-scripts/10-munge /etc/docker-init.d/
COPY start-scripts/20-slurmctld /etc/docker-init.d/
RUN chmod 755 /etc/docker-init.d/*
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
CMD /usr/local/sbin/docker-init

View File

@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
yum clean all && rm -rf /var/cache/yum
# FIXME
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
RUN yum install -y slurm-slurmd && \
yum clean all && rm -rf /var/cache/yum
COPY docker-init /usr/local/sbin/docker-init
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
# FIXME
COPY start-scripts/10-munge /etc/docker-init.d/
COPY start-scripts/30-slurmd /etc/docker-init.d/
RUN chmod 755 /etc/docker-init.d/*
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
slurmd -D 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
CMD /usr/local/sbin/docker-init

View File

@@ -0,0 +1,2 @@
#!/bin/sh
exec su -s /bin/sh -c "munged -F" munge

View File

@@ -0,0 +1,4 @@
#!/bin/sh
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null &
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}

View File

@@ -0,0 +1,4 @@
#!/bin/sh
slurmd -D 2>/dev/null 1>/dev/null &
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}

View File

@@ -1,31 +1,50 @@
- file:
path: "/container/docker-images/{{item}}"
path: "/container/docker-images/slurm"
state: directory
owner: "{{unpriv_user}}"
group: docker
- copy:
src: "{{item}}.Dockerfile"
dest: "/container/docker-images/{{item}}/Dockerfile"
- copy: # FIXME: template
src: "{{image.name}}.Dockerfile"
dest: "/container/docker-images/slurm/{{image.name}}.Dockerfile"
owner: "{{unpriv_user}}"
group: docker
register: slurm_cp_dockerfile
- copy:
src: "entrypoint.sh"
dest: "/container/docker-images/{{item}}/entrypoint.sh"
- name: copy entrypoint and docker-init
copy: # FIXME: swap out
src: "{{item}}"
dest: "/container/docker-images/slurm/{{item}}"
owner: root
group: root
mode: u=rwx,g=rx,o=rx
loop:
- entrypoint.sh
- docker-init
register: slurm_cp_entrypt
- name: copy startup scripts
copy:
src: "start-scripts/"
dest: "/container/docker-images/slurm/start-scripts/"
owner: root
group: root
mode: u=rwx,g=rx,o=rx
register: slurm_cp_stscrs
- set_fact:
slurm_image_files_changed: "{{ (slurm_image_files_changed | default(False))
or slurm_cp_entrypt.changed or slurm_cp_stscrs.changed }}"
- docker_image:
name: "slurm-{{item}}"
name: "slurm"
tag: "{{image.name}}"
# pull: False
build:
pull: False
path: "/container/docker-images/{{item}}"
# target: "{{item}}" # unsupported on old docker-py versions as in el7
path: "/container/docker-images/slurm/"
dockerfile: "{{image.name}}.Dockerfile"
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
source: build
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}"
force_source: "{{slurm_cp_dockerfile.changed or slurm_image_files_changed}}"

View File

@@ -5,8 +5,17 @@
- include_tasks: dockerimage.yml
loop:
- slurmctld
- slurmd
- name: slurmctld
start:
- 10-munge
- 20-slurmctld
- name: slurmd
start:
- 10-munge
- 30-slurmd
loop_control:
loop_var: image
label: "{{ image.name }}"
- name: generate munge key
shell:
@@ -63,9 +72,10 @@
- set_fact:
slurm_nodes: # default nodes: controller and submit machine
- machine: ctl
image: slurm-slurmctld
image: slurm:slurmctld
exposed_ports: [ "6817:6817/tcp" ]
- machine: submit1
image: slurm-slurmd
image: slurm:slurmd
extra_mounts:
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
tags: [ slurm-config ]
@@ -80,6 +90,7 @@
hostname: "slurm-{{item.machine}}"
domainname: "slurm.local"
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
ports: "{{ item.exposed_ports | default([]) }}"
networks:
- name: "slurm"
env:
@@ -90,15 +101,16 @@
cleanup: True
privileged: "{{ container_privileged | bool }}"
networks_cli_compatible: True
interactive: True
vars:
default_mounts:
- /container/volumes/slurm/:/etc/slurm/:rw
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
- slurm-shared:/shared/:rw
slurm_nodes_all: | # add execute nodes
{% for i in range(1, 4) -%}
{% for i in range(1, num_nodes+1) -%}
{% set _ = slurm_nodes.extend([
{'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%}
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
{%- endfor %}
{{ slurm_nodes }}
loop: "{{slurm_nodes_all}}"