new startup for cobald containers
This commit is contained in:
1
inv.yml
1
inv.yml
@@ -19,6 +19,7 @@ all:
|
|||||||
slurm:
|
slurm:
|
||||||
hosts:
|
hosts:
|
||||||
ed-c7-1:
|
ed-c7-1:
|
||||||
|
ed-c7-2:
|
||||||
cobald:
|
cobald:
|
||||||
hosts:
|
hosts:
|
||||||
ed-c7-1:
|
ed-c7-1:
|
||||||
|
13
play.yml
13
play.yml
@@ -53,10 +53,10 @@
|
|||||||
- htcondor-containered
|
- htcondor-containered
|
||||||
- htcondor
|
- htcondor
|
||||||
|
|
||||||
- hosts: slurm
|
- hosts: slurm, cobald
|
||||||
vars:
|
vars:
|
||||||
container_privileged: True
|
container_privileged: True
|
||||||
num_nodes: 3
|
slurm_num_nodes: 10
|
||||||
roles:
|
roles:
|
||||||
- name: "setup docker"
|
- name: "setup docker"
|
||||||
role: docker
|
role: docker
|
||||||
@@ -65,15 +65,12 @@
|
|||||||
role: slurm
|
role: slurm
|
||||||
vars:
|
vars:
|
||||||
slurm_user: slurm # or root
|
slurm_user: slurm # or root
|
||||||
|
num_nodes: "{{slurm_num_nodes}}"
|
||||||
|
when: '"slurm" in group_names'
|
||||||
tags: slurm
|
tags: slurm
|
||||||
|
|
||||||
- hosts: cobald
|
|
||||||
roles:
|
|
||||||
- name: "setup docker"
|
|
||||||
role: docker
|
|
||||||
tags: docker
|
|
||||||
- name: "install cobald"
|
- name: "install cobald"
|
||||||
role: cobald
|
role: cobald
|
||||||
vars:
|
vars:
|
||||||
docker_network: slurm
|
docker_network: slurm
|
||||||
|
when: '"cobald" in group_names'
|
||||||
tags: cobald
|
tags: cobald
|
||||||
|
18
roles/slurm/files/docker-init
Normal file
18
roles/slurm/files/docker-init
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
function trp_term(){
|
||||||
|
echo pkill -P $pids
|
||||||
|
for j in $pids ; do
|
||||||
|
pkill -P $j
|
||||||
|
kill -SIGTERM $j
|
||||||
|
done
|
||||||
|
}
|
||||||
|
trap trp_term SIGINT SIGTERM
|
||||||
|
pids=""
|
||||||
|
for i in /etc/docker-init.d/* ; do
|
||||||
|
[ ! -f $i ] && break
|
||||||
|
$i &
|
||||||
|
pids="$pids $!"
|
||||||
|
done
|
||||||
|
wait $pids
|
||||||
|
|
||||||
|
# TODO: call start scripts like "foo.sh start" and "foo.sh stop" to avoid pkill
|
@@ -1,4 +1,4 @@
|
|||||||
FROM docker.io/library/centos:7 as base
|
FROM docker.io/library/centos:7
|
||||||
|
|
||||||
RUN yum install -y epel-release && \
|
RUN yum install -y epel-release && \
|
||||||
yum install -y slurm && \
|
yum install -y slurm && \
|
||||||
@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
|
|||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
# FIXME
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
||||||
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|||||||
RUN yum install -y slurm-slurmctld && \
|
RUN yum install -y slurm-slurmctld && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
COPY docker-init /usr/local/sbin/docker-init
|
||||||
|
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
|
||||||
|
# FIXME
|
||||||
|
COPY start-scripts/10-munge /etc/docker-init.d/
|
||||||
|
COPY start-scripts/20-slurmctld /etc/docker-init.d/
|
||||||
|
RUN chmod 755 /etc/docker-init.d/*
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
CMD /usr/local/sbin/docker-init
|
||||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
|
|||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
# FIXME
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
||||||
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|||||||
RUN yum install -y slurm-slurmd && \
|
RUN yum install -y slurm-slurmd && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
COPY docker-init /usr/local/sbin/docker-init
|
||||||
|
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
|
||||||
|
# FIXME
|
||||||
|
COPY start-scripts/10-munge /etc/docker-init.d/
|
||||||
|
COPY start-scripts/30-slurmd /etc/docker-init.d/
|
||||||
|
RUN chmod 755 /etc/docker-init.d/*
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
CMD /usr/local/sbin/docker-init
|
||||||
slurmd -D 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
2
roles/slurm/files/start-scripts/10-munge
Normal file
2
roles/slurm/files/start-scripts/10-munge
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
exec su -s /bin/sh -c "munged -F" munge
|
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null &
|
||||||
|
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||||
|
|
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
slurmd -D 2>/dev/null 1>/dev/null &
|
||||||
|
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||||
|
|
@@ -1,31 +1,50 @@
|
|||||||
- file:
|
- file:
|
||||||
path: "/container/docker-images/{{item}}"
|
path: "/container/docker-images/slurm"
|
||||||
state: directory
|
state: directory
|
||||||
owner: "{{unpriv_user}}"
|
owner: "{{unpriv_user}}"
|
||||||
group: docker
|
group: docker
|
||||||
|
|
||||||
- copy:
|
- copy: # FIXME: template
|
||||||
src: "{{item}}.Dockerfile"
|
src: "{{image.name}}.Dockerfile"
|
||||||
dest: "/container/docker-images/{{item}}/Dockerfile"
|
dest: "/container/docker-images/slurm/{{image.name}}.Dockerfile"
|
||||||
owner: "{{unpriv_user}}"
|
owner: "{{unpriv_user}}"
|
||||||
group: docker
|
group: docker
|
||||||
register: slurm_cp_dockerfile
|
register: slurm_cp_dockerfile
|
||||||
|
|
||||||
- copy:
|
- name: copy entrypoint and docker-init
|
||||||
src: "entrypoint.sh"
|
copy: # FIXME: swap out
|
||||||
dest: "/container/docker-images/{{item}}/entrypoint.sh"
|
src: "{{item}}"
|
||||||
|
dest: "/container/docker-images/slurm/{{item}}"
|
||||||
owner: root
|
owner: root
|
||||||
group: root
|
group: root
|
||||||
mode: u=rwx,g=rx,o=rx
|
mode: u=rwx,g=rx,o=rx
|
||||||
|
loop:
|
||||||
|
- entrypoint.sh
|
||||||
|
- docker-init
|
||||||
register: slurm_cp_entrypt
|
register: slurm_cp_entrypt
|
||||||
|
|
||||||
|
- name: copy startup scripts
|
||||||
|
copy:
|
||||||
|
src: "start-scripts/"
|
||||||
|
dest: "/container/docker-images/slurm/start-scripts/"
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: u=rwx,g=rx,o=rx
|
||||||
|
register: slurm_cp_stscrs
|
||||||
|
|
||||||
|
- set_fact:
|
||||||
|
slurm_image_files_changed: "{{ (slurm_image_files_changed | default(False))
|
||||||
|
or slurm_cp_entrypt.changed or slurm_cp_stscrs.changed }}"
|
||||||
|
|
||||||
- docker_image:
|
- docker_image:
|
||||||
name: "slurm-{{item}}"
|
name: "slurm"
|
||||||
|
tag: "{{image.name}}"
|
||||||
# pull: False
|
# pull: False
|
||||||
build:
|
build:
|
||||||
pull: False
|
pull: False
|
||||||
path: "/container/docker-images/{{item}}"
|
path: "/container/docker-images/slurm/"
|
||||||
# target: "{{item}}" # unsupported on old docker-py versions as in el7
|
dockerfile: "{{image.name}}.Dockerfile"
|
||||||
|
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
|
||||||
source: build
|
source: build
|
||||||
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}"
|
force_source: "{{slurm_cp_dockerfile.changed or slurm_image_files_changed}}"
|
||||||
|
|
||||||
|
@@ -5,8 +5,17 @@
|
|||||||
|
|
||||||
- include_tasks: dockerimage.yml
|
- include_tasks: dockerimage.yml
|
||||||
loop:
|
loop:
|
||||||
- slurmctld
|
- name: slurmctld
|
||||||
- slurmd
|
start:
|
||||||
|
- 10-munge
|
||||||
|
- 20-slurmctld
|
||||||
|
- name: slurmd
|
||||||
|
start:
|
||||||
|
- 10-munge
|
||||||
|
- 30-slurmd
|
||||||
|
loop_control:
|
||||||
|
loop_var: image
|
||||||
|
label: "{{ image.name }}"
|
||||||
|
|
||||||
- name: generate munge key
|
- name: generate munge key
|
||||||
shell:
|
shell:
|
||||||
@@ -63,9 +72,10 @@
|
|||||||
- set_fact:
|
- set_fact:
|
||||||
slurm_nodes: # default nodes: controller and submit machine
|
slurm_nodes: # default nodes: controller and submit machine
|
||||||
- machine: ctl
|
- machine: ctl
|
||||||
image: slurm-slurmctld
|
image: slurm:slurmctld
|
||||||
|
exposed_ports: [ "6817:6817/tcp" ]
|
||||||
- machine: submit1
|
- machine: submit1
|
||||||
image: slurm-slurmd
|
image: slurm:slurmd
|
||||||
extra_mounts:
|
extra_mounts:
|
||||||
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
||||||
tags: [ slurm-config ]
|
tags: [ slurm-config ]
|
||||||
@@ -80,6 +90,7 @@
|
|||||||
hostname: "slurm-{{item.machine}}"
|
hostname: "slurm-{{item.machine}}"
|
||||||
domainname: "slurm.local"
|
domainname: "slurm.local"
|
||||||
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||||
|
ports: "{{ item.exposed_ports | default([]) }}"
|
||||||
networks:
|
networks:
|
||||||
- name: "slurm"
|
- name: "slurm"
|
||||||
env:
|
env:
|
||||||
@@ -90,15 +101,16 @@
|
|||||||
cleanup: True
|
cleanup: True
|
||||||
privileged: "{{ container_privileged | bool }}"
|
privileged: "{{ container_privileged | bool }}"
|
||||||
networks_cli_compatible: True
|
networks_cli_compatible: True
|
||||||
|
interactive: True
|
||||||
vars:
|
vars:
|
||||||
default_mounts:
|
default_mounts:
|
||||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||||
- slurm-shared:/shared/:rw
|
- slurm-shared:/shared/:rw
|
||||||
slurm_nodes_all: | # add execute nodes
|
slurm_nodes_all: | # add execute nodes
|
||||||
{% for i in range(1, 4) -%}
|
{% for i in range(1, num_nodes+1) -%}
|
||||||
{% set _ = slurm_nodes.extend([
|
{% set _ = slurm_nodes.extend([
|
||||||
{'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%}
|
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||||
{%- endfor %}
|
{%- endfor %}
|
||||||
{{ slurm_nodes }}
|
{{ slurm_nodes }}
|
||||||
loop: "{{slurm_nodes_all}}"
|
loop: "{{slurm_nodes_all}}"
|
||||||
|
Reference in New Issue
Block a user