new startup for cobald containers
This commit is contained in:
13
play.yml
13
play.yml
@@ -53,10 +53,10 @@
|
||||
- htcondor-containered
|
||||
- htcondor
|
||||
|
||||
- hosts: slurm
|
||||
- hosts: slurm, cobald
|
||||
vars:
|
||||
container_privileged: True
|
||||
num_nodes: 3
|
||||
slurm_num_nodes: 10
|
||||
roles:
|
||||
- name: "setup docker"
|
||||
role: docker
|
||||
@@ -65,15 +65,12 @@
|
||||
role: slurm
|
||||
vars:
|
||||
slurm_user: slurm # or root
|
||||
num_nodes: "{{slurm_num_nodes}}"
|
||||
when: '"slurm" in group_names'
|
||||
tags: slurm
|
||||
|
||||
- hosts: cobald
|
||||
roles:
|
||||
- name: "setup docker"
|
||||
role: docker
|
||||
tags: docker
|
||||
- name: "install cobald"
|
||||
role: cobald
|
||||
vars:
|
||||
docker_network: slurm
|
||||
when: '"cobald" in group_names'
|
||||
tags: cobald
|
||||
|
18
roles/slurm/files/docker-init
Normal file
18
roles/slurm/files/docker-init
Normal file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
function trp_term(){
|
||||
echo pkill -P $pids
|
||||
for j in $pids ; do
|
||||
pkill -P $j
|
||||
kill -SIGTERM $j
|
||||
done
|
||||
}
|
||||
trap trp_term SIGINT SIGTERM
|
||||
pids=""
|
||||
for i in /etc/docker-init.d/* ; do
|
||||
[ ! -f $i ] && break
|
||||
$i &
|
||||
pids="$pids $!"
|
||||
done
|
||||
wait $pids
|
||||
|
||||
# TODO: call start scripts like "foo.sh start" and "foo.sh stop" to avoid pkill
|
@@ -1,4 +1,4 @@
|
||||
FROM docker.io/library/centos:7 as base
|
||||
FROM docker.io/library/centos:7
|
||||
|
||||
RUN yum install -y epel-release && \
|
||||
yum install -y slurm && \
|
||||
@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
|
||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
# FIXME
|
||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||
|
||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
||||
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
||||
RUN yum install -y slurm-slurmctld && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
COPY docker-init /usr/local/sbin/docker-init
|
||||
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
|
||||
# FIXME
|
||||
COPY start-scripts/10-munge /etc/docker-init.d/
|
||||
COPY start-scripts/20-slurmctld /etc/docker-init.d/
|
||||
RUN chmod 755 /etc/docker-init.d/*
|
||||
|
||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||
|
||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
|
||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
||||
CMD /usr/local/sbin/docker-init
|
||||
|
@@ -7,6 +7,7 @@ RUN yum install -y epel-release && \
|
||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
# FIXME
|
||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||
|
||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
||||
@@ -23,10 +24,15 @@ RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
||||
RUN yum install -y slurm-slurmd && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
COPY docker-init /usr/local/sbin/docker-init
|
||||
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
|
||||
# FIXME
|
||||
COPY start-scripts/10-munge /etc/docker-init.d/
|
||||
COPY start-scripts/30-slurmd /etc/docker-init.d/
|
||||
RUN chmod 755 /etc/docker-init.d/*
|
||||
|
||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||
|
||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
||||
slurmd -D 2>/dev/null 1>/dev/null & \
|
||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
||||
CMD /usr/local/sbin/docker-init
|
||||
|
2
roles/slurm/files/start-scripts/10-munge
Normal file
2
roles/slurm/files/start-scripts/10-munge
Normal file
@@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
exec su -s /bin/sh -c "munged -F" munge
|
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null &
|
||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||
|
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
slurmd -D 2>/dev/null 1>/dev/null &
|
||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||
|
@@ -1,31 +1,50 @@
|
||||
- file:
|
||||
path: "/container/docker-images/{{item}}"
|
||||
path: "/container/docker-images/slurm"
|
||||
state: directory
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
|
||||
- copy:
|
||||
src: "{{item}}.Dockerfile"
|
||||
dest: "/container/docker-images/{{item}}/Dockerfile"
|
||||
- copy: # FIXME: template
|
||||
src: "{{image.name}}.Dockerfile"
|
||||
dest: "/container/docker-images/slurm/{{image.name}}.Dockerfile"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
register: slurm_cp_dockerfile
|
||||
|
||||
- copy:
|
||||
src: "entrypoint.sh"
|
||||
dest: "/container/docker-images/{{item}}/entrypoint.sh"
|
||||
- name: copy entrypoint and docker-init
|
||||
copy: # FIXME: swap out
|
||||
src: "{{item}}"
|
||||
dest: "/container/docker-images/slurm/{{item}}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=rwx,g=rx,o=rx
|
||||
loop:
|
||||
- entrypoint.sh
|
||||
- docker-init
|
||||
register: slurm_cp_entrypt
|
||||
|
||||
- name: copy startup scripts
|
||||
copy:
|
||||
src: "start-scripts/"
|
||||
dest: "/container/docker-images/slurm/start-scripts/"
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=rwx,g=rx,o=rx
|
||||
register: slurm_cp_stscrs
|
||||
|
||||
- set_fact:
|
||||
slurm_image_files_changed: "{{ (slurm_image_files_changed | default(False))
|
||||
or slurm_cp_entrypt.changed or slurm_cp_stscrs.changed }}"
|
||||
|
||||
- docker_image:
|
||||
name: "slurm-{{item}}"
|
||||
name: "slurm"
|
||||
tag: "{{image.name}}"
|
||||
# pull: False
|
||||
build:
|
||||
pull: False
|
||||
path: "/container/docker-images/{{item}}"
|
||||
# target: "{{item}}" # unsupported on old docker-py versions as in el7
|
||||
path: "/container/docker-images/slurm/"
|
||||
dockerfile: "{{image.name}}.Dockerfile"
|
||||
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
|
||||
source: build
|
||||
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}"
|
||||
force_source: "{{slurm_cp_dockerfile.changed or slurm_image_files_changed}}"
|
||||
|
||||
|
@@ -5,8 +5,17 @@
|
||||
|
||||
- include_tasks: dockerimage.yml
|
||||
loop:
|
||||
- slurmctld
|
||||
- slurmd
|
||||
- name: slurmctld
|
||||
start:
|
||||
- 10-munge
|
||||
- 20-slurmctld
|
||||
- name: slurmd
|
||||
start:
|
||||
- 10-munge
|
||||
- 30-slurmd
|
||||
loop_control:
|
||||
loop_var: image
|
||||
label: "{{ image.name }}"
|
||||
|
||||
- name: generate munge key
|
||||
shell:
|
||||
@@ -63,9 +72,10 @@
|
||||
- set_fact:
|
||||
slurm_nodes: # default nodes: controller and submit machine
|
||||
- machine: ctl
|
||||
image: slurm-slurmctld
|
||||
image: slurm:slurmctld
|
||||
exposed_ports: [ "6817:6817/tcp" ]
|
||||
- machine: submit1
|
||||
image: slurm-slurmd
|
||||
image: slurm:slurmd
|
||||
extra_mounts:
|
||||
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
||||
tags: [ slurm-config ]
|
||||
@@ -80,6 +90,7 @@
|
||||
hostname: "slurm-{{item.machine}}"
|
||||
domainname: "slurm.local"
|
||||
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||
ports: "{{ item.exposed_ports | default([]) }}"
|
||||
networks:
|
||||
- name: "slurm"
|
||||
env:
|
||||
@@ -90,15 +101,16 @@
|
||||
cleanup: True
|
||||
privileged: "{{ container_privileged | bool }}"
|
||||
networks_cli_compatible: True
|
||||
interactive: True
|
||||
vars:
|
||||
default_mounts:
|
||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
- slurm-shared:/shared/:rw
|
||||
slurm_nodes_all: | # add execute nodes
|
||||
{% for i in range(1, 4) -%}
|
||||
{% for i in range(1, num_nodes+1) -%}
|
||||
{% set _ = slurm_nodes.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%}
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes }}
|
||||
loop: "{{slurm_nodes_all}}"
|
||||
|
Reference in New Issue
Block a user