new startup for cobald containers

This commit is contained in:
2021-06-17 14:55:21 +02:00
parent cc43a39ea3
commit 6eb6984d6a
10 changed files with 102 additions and 33 deletions

View File

@@ -19,6 +19,7 @@ all:
slurm: slurm:
hosts: hosts:
ed-c7-1: ed-c7-1:
ed-c7-2:
cobald: cobald:
hosts: hosts:
ed-c7-1: ed-c7-1:

View File

@@ -53,10 +53,10 @@
- htcondor-containered - htcondor-containered
- htcondor - htcondor
- hosts: slurm - hosts: slurm, cobald
vars: vars:
container_privileged: True container_privileged: True
num_nodes: 3 slurm_num_nodes: 10
roles: roles:
- name: "setup docker" - name: "setup docker"
role: docker role: docker
@@ -65,15 +65,12 @@
role: slurm role: slurm
vars: vars:
slurm_user: slurm # or root slurm_user: slurm # or root
num_nodes: "{{slurm_num_nodes}}"
when: '"slurm" in group_names'
tags: slurm tags: slurm
- hosts: cobald
roles:
- name: "setup docker"
role: docker
tags: docker
- name: "install cobald" - name: "install cobald"
role: cobald role: cobald
vars: vars:
docker_network: slurm docker_network: slurm
when: '"cobald" in group_names'
tags: cobald tags: cobald

View File

@@ -0,0 +1,18 @@
#!/bin/bash
function trp_term(){
echo pkill -P $pids
for j in $pids ; do
pkill -P $j
kill -SIGTERM $j
done
}
trap trp_term SIGINT SIGTERM
pids=""
for i in /etc/docker-init.d/* ; do
[ ! -f $i ] && break
$i &
pids="$pids $!"
done
wait $pids
# TODO: call start scripts like "foo.sh start" and "foo.sh stop" to avoid pkill

View File

@@ -1,4 +1,4 @@
FROM docker.io/library/centos:7 as base FROM docker.io/library/centos:7
RUN yum install -y epel-release && \ RUN yum install -y epel-release && \
yum install -y slurm && \ yum install -y slurm && \
@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \ RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
yum clean all && rm -rf /var/cache/yum yum clean all && rm -rf /var/cache/yum
# FIXME
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && \ RUN chown root:root /usr/local/sbin/entrypoint.sh && \
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
RUN yum install -y slurm-slurmctld && \ RUN yum install -y slurm-slurmctld && \
yum clean all && rm -rf /var/cache/yum yum clean all && rm -rf /var/cache/yum
COPY docker-init /usr/local/sbin/docker-init
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
# FIXME
COPY start-scripts/10-munge /etc/docker-init.d/
COPY start-scripts/20-slurmctld /etc/docker-init.d/
RUN chmod 755 /etc/docker-init.d/*
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log" ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log" ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log" ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \ CMD /usr/local/sbin/docker-init
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'

View File

@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \ RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
yum clean all && rm -rf /var/cache/yum yum clean all && rm -rf /var/cache/yum
# FIXME
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
RUN chown root:root /usr/local/sbin/entrypoint.sh && \ RUN chown root:root /usr/local/sbin/entrypoint.sh && \
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
RUN yum install -y slurm-slurmd && \ RUN yum install -y slurm-slurmd && \
yum clean all && rm -rf /var/cache/yum yum clean all && rm -rf /var/cache/yum
COPY docker-init /usr/local/sbin/docker-init
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
# FIXME
COPY start-scripts/10-munge /etc/docker-init.d/
COPY start-scripts/30-slurmd /etc/docker-init.d/
RUN chmod 755 /etc/docker-init.d/*
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log" ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log" ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log" ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \ CMD /usr/local/sbin/docker-init
slurmd -D 2>/dev/null 1>/dev/null & \
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'

View File

@@ -0,0 +1,2 @@
#!/bin/sh
exec su -s /bin/sh -c "munged -F" munge

View File

@@ -0,0 +1,4 @@
#!/bin/sh
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null &
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}

View File

@@ -0,0 +1,4 @@
#!/bin/sh
slurmd -D 2>/dev/null 1>/dev/null &
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}

View File

@@ -1,31 +1,50 @@
- file: - file:
path: "/container/docker-images/{{item}}" path: "/container/docker-images/slurm"
state: directory state: directory
owner: "{{unpriv_user}}" owner: "{{unpriv_user}}"
group: docker group: docker
- copy: - copy: # FIXME: template
src: "{{item}}.Dockerfile" src: "{{image.name}}.Dockerfile"
dest: "/container/docker-images/{{item}}/Dockerfile" dest: "/container/docker-images/slurm/{{image.name}}.Dockerfile"
owner: "{{unpriv_user}}" owner: "{{unpriv_user}}"
group: docker group: docker
register: slurm_cp_dockerfile register: slurm_cp_dockerfile
- copy: - name: copy entrypoint and docker-init
src: "entrypoint.sh" copy: # FIXME: swap out
dest: "/container/docker-images/{{item}}/entrypoint.sh" src: "{{item}}"
dest: "/container/docker-images/slurm/{{item}}"
owner: root owner: root
group: root group: root
mode: u=rwx,g=rx,o=rx mode: u=rwx,g=rx,o=rx
loop:
- entrypoint.sh
- docker-init
register: slurm_cp_entrypt register: slurm_cp_entrypt
- name: copy startup scripts
copy:
src: "start-scripts/"
dest: "/container/docker-images/slurm/start-scripts/"
owner: root
group: root
mode: u=rwx,g=rx,o=rx
register: slurm_cp_stscrs
- set_fact:
slurm_image_files_changed: "{{ (slurm_image_files_changed | default(False))
or slurm_cp_entrypt.changed or slurm_cp_stscrs.changed }}"
- docker_image: - docker_image:
name: "slurm-{{item}}" name: "slurm"
tag: "{{image.name}}"
# pull: False # pull: False
build: build:
pull: False pull: False
path: "/container/docker-images/{{item}}" path: "/container/docker-images/slurm/"
# target: "{{item}}" # unsupported on old docker-py versions as in el7 dockerfile: "{{image.name}}.Dockerfile"
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
source: build source: build
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}" force_source: "{{slurm_cp_dockerfile.changed or slurm_image_files_changed}}"

View File

@@ -5,8 +5,17 @@
- include_tasks: dockerimage.yml - include_tasks: dockerimage.yml
loop: loop:
- slurmctld - name: slurmctld
- slurmd start:
- 10-munge
- 20-slurmctld
- name: slurmd
start:
- 10-munge
- 30-slurmd
loop_control:
loop_var: image
label: "{{ image.name }}"
- name: generate munge key - name: generate munge key
shell: shell:
@@ -63,13 +72,14 @@
- set_fact: - set_fact:
slurm_nodes: # default nodes: controller and submit machine slurm_nodes: # default nodes: controller and submit machine
- machine: ctl - machine: ctl
image: slurm-slurmctld image: slurm:slurmctld
exposed_ports: [ "6817:6817/tcp" ]
- machine: submit1 - machine: submit1
image: slurm-slurmd image: slurm:slurmd
extra_mounts: extra_mounts:
- "/home/{{unpriv_user}}/job3/:/mnt/:rw" - "/home/{{unpriv_user}}/job3/:/mnt/:rw"
tags: [ slurm-config ] tags: [ slurm-config ]
# TODO: reserve some address using docker_network_info and assign as aux # TODO: reserve some address using docker_network_info and assign as aux
# address to enable slurmctld to get a static address in order to be # address to enable slurmctld to get a static address in order to be
# reachable from slurm running on docker host to enable submitting jobs. # reachable from slurm running on docker host to enable submitting jobs.
@@ -80,6 +90,7 @@
hostname: "slurm-{{item.machine}}" hostname: "slurm-{{item.machine}}"
domainname: "slurm.local" domainname: "slurm.local"
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}" volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
ports: "{{ item.exposed_ports | default([]) }}"
networks: networks:
- name: "slurm" - name: "slurm"
env: env:
@@ -90,15 +101,16 @@
cleanup: True cleanup: True
privileged: "{{ container_privileged | bool }}" privileged: "{{ container_privileged | bool }}"
networks_cli_compatible: True networks_cli_compatible: True
interactive: True
vars: vars:
default_mounts: default_mounts:
- /container/volumes/slurm/:/etc/slurm/:rw - /container/volumes/slurm/:/etc/slurm/:rw
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw - /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
- slurm-shared:/shared/:rw - slurm-shared:/shared/:rw
slurm_nodes_all: | # add execute nodes slurm_nodes_all: | # add execute nodes
{% for i in range(1, 4) -%} {% for i in range(1, num_nodes+1) -%}
{% set _ = slurm_nodes.extend([ {% set _ = slurm_nodes.extend([
{'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%} {'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
{%- endfor %} {%- endfor %}
{{ slurm_nodes }} {{ slurm_nodes }}
loop: "{{slurm_nodes_all}}" loop: "{{slurm_nodes_all}}"