WIP: cobald container containing and using slurm
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
[ -f /usr/local/lib/cobaldmodules/setup.py -a \
|
||||
-d /usr/local/lib/cobaldmodules/cobaldmodules ] && \
|
||||
pip3 install --no-deps --editable /usr/local/lib/cobaldmodules
|
||||
for i in /usr/local/lib/entrypoints.d/* ; do
|
||||
[ -f $i ] && /bin/sh $i || break
|
||||
done
|
||||
|
||||
exec "${@:-/bin/bash}"
|
||||
|
5
roles/cobald/files/init-cobaldmodules.sh
Normal file
5
roles/cobald/files/init-cobaldmodules.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
[ -f /usr/local/lib/cobaldmodules/setup.py -a \
|
||||
-d /usr/local/lib/cobaldmodules/cobaldmodules ] && \
|
||||
pip3 install --no-deps --editable /usr/local/lib/cobaldmodules
|
@@ -1,33 +1,41 @@
|
||||
- include_vars: cobald-slurm.yml
|
||||
when: cobald_slurm | default(False)
|
||||
|
||||
- file:
|
||||
path: "/container/{{item}}/cobald/"
|
||||
path: "/container/{{item.name}}/cobald{{item.pfx|default('')}}/"
|
||||
state: directory
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
loop:
|
||||
- docker-images
|
||||
- volumes
|
||||
- name: docker-images
|
||||
pfx: ".{{cobald_image_tag|default('latest')}}"
|
||||
- name: volumes
|
||||
|
||||
- copy:
|
||||
- template:
|
||||
src: cobald.Dockerfile
|
||||
dest: /container/docker-images/cobald/Dockerfile
|
||||
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/Dockerfile"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
register: cobald_cp_dockerfile
|
||||
|
||||
- copy:
|
||||
src: cobald-entrypoint.sh
|
||||
dest: /container/docker-images/cobald/cobald-entrypoint.sh
|
||||
src: "{{item}}"
|
||||
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/{{item}}"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
mode: 0755
|
||||
with_items:
|
||||
- cobald-entrypoint.sh
|
||||
- init-cobaldmodules.sh
|
||||
register: cobald_cp_files
|
||||
|
||||
- docker_image:
|
||||
name: "cobald"
|
||||
tag: "{{cobald_image_tag|default('latest')}}"
|
||||
# pull: False
|
||||
build:
|
||||
pull: False
|
||||
path: "/container/docker-images/cobald/"
|
||||
path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/"
|
||||
source: build
|
||||
force_source: "{{cobald_cp_dockerfile.changed or cobald_cp_files.changed}}"
|
||||
|
||||
@@ -39,7 +47,7 @@
|
||||
when: False
|
||||
|
||||
- docker_network:
|
||||
name: "{{docker_network}}"
|
||||
name: "{{cobald_docker_network}}" # FIXME
|
||||
state: present
|
||||
|
||||
# docker run -v $(pwd)/cobald-config-host:/etc/cobald -v $(pwd)/cobald:/cobald --rm -it cobald bash
|
||||
@@ -82,7 +90,7 @@
|
||||
|
||||
- name: run pip install
|
||||
docker_container:
|
||||
image: cobald
|
||||
image: "cobald:{{cobald_image_tag|default('latest')}}"
|
||||
name: "cobald-src-{{item.name}}-install"
|
||||
volumes:
|
||||
- "~{{unpriv_user}}/{{item.name}}-src:/usr/local/src/{{item.name}}:rw"
|
||||
@@ -102,29 +110,34 @@
|
||||
|
||||
- import_tasks: telegraf.yml
|
||||
|
||||
- docker_container_info:
|
||||
name: cobald
|
||||
register: cobald_container_info
|
||||
- name: get cobald hostname
|
||||
include_role:
|
||||
name: cobald_facts
|
||||
when: cobald_container_hostname is not defined
|
||||
|
||||
- docker_container:
|
||||
name: cobald
|
||||
image: cobald
|
||||
hostname: |-
|
||||
{{cobald_container_info.container.Config.Hostname | default('cobald-'+
|
||||
lookup('password', '/dev/null chars=ascii_lowercase length=6')) }}
|
||||
domainname: cobald.local
|
||||
volumes:
|
||||
- name: run cobald container
|
||||
docker_container:
|
||||
name: "{{ container_name | default('cobald') }}"
|
||||
image: "cobald:{{cobald_image_tag|default('latest')}}"
|
||||
hostname: "{{cobald_container_hostname}}"
|
||||
domainname: "{{ cobald_domainname | default('cobald.local')}}"
|
||||
volumes: "{{default_mounts + cobald_mounts }}"
|
||||
networks:
|
||||
- name: "{{cobald_docker_network}}"
|
||||
networks_cli_compatible: True
|
||||
# env:
|
||||
# slurmuser: "{{slurm_user}}"
|
||||
# privileged: "{{ container_privileged | bool }}"
|
||||
state: started
|
||||
detach: True
|
||||
cleanup: True
|
||||
interactive: True
|
||||
# command: python3 -m cobald.daemon /etc/cobald/config.yaml
|
||||
vars:
|
||||
default_mounts: "{{cobald_slurm_mounts | default([])}}"
|
||||
cobald_mounts:
|
||||
- "~{{unpriv_user}}/cobald:/etc/cobald"
|
||||
# - /container/volumes/cobald:/etc/cobald:ro
|
||||
- "~{{unpriv_user}}/cobald/modules:/usr/local/src/cobaldmodules"
|
||||
- "~{{unpriv_user}}/cobald-src:/usr/local/src/cobald:ro"
|
||||
- "~{{unpriv_user}}/tardis-src:/usr/local/src/tardis:ro"
|
||||
networks:
|
||||
- name: "{{docker_network}}"
|
||||
state: started
|
||||
detach: True
|
||||
cleanup: True
|
||||
interactive: True
|
||||
# command: /bin/bash
|
||||
# python3 -m cobald.daemon /etc/cobald/config.yaml
|
||||
command: python3 -m cobald.daemon /etc/cobald/config.yaml
|
||||
|
@@ -1,4 +1,4 @@
|
||||
FROM docker.io/library/centos:7
|
||||
FROM {{ cobald_docker_base_image | default("docker.io/library/centos:7") }}
|
||||
|
||||
RUN yum update -y && \
|
||||
yum install -y python3 git && pip3 install --upgrade pip && \
|
||||
@@ -41,11 +41,20 @@ VOLUME /usr/local/src/cobaldmodules
|
||||
|
||||
VOLUME /etc/cobald
|
||||
|
||||
COPY cobald-entrypoint.sh /usr/local/sbin/docker-entrypoint.sh
|
||||
RUN mkdir -p /usr/local/lib/entrypoints.d/
|
||||
|
||||
RUN chmod 755 /usr/local/sbin/docker-entrypoint.sh
|
||||
COPY init-cobaldmodules.sh /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/docker-entrypoint.sh" ]
|
||||
RUN chmod 755 /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||
|
||||
RUN echo -e "#!/bin/sh\npython3 -m cobald.daemon /etc/cobald/config.yaml" >> /etc/docker-init.d/70-cobald && chmod 755 /etc/docker-init.d/70-cobald
|
||||
|
||||
{% if cobald_docker_default_command | default(True) -%}
|
||||
COPY cobald-entrypoint.sh /usr/local/sbin/cobald-entrypoint.sh
|
||||
|
||||
RUN chmod 755 /usr/local/sbin/cobald-entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/cobald-entrypoint.sh" ]
|
||||
|
||||
RUN yum -y install iproute &&\
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
@@ -54,4 +63,6 @@ USER cobald
|
||||
|
||||
STOPSIGNAL SIGINT
|
||||
|
||||
CMD "python3 -m cobald.daemon /etc/cobald/config.yaml"
|
||||
# CMD "python3 -m cobald.daemon /etc/cobald/config.yaml"
|
||||
CMD /etc/docker-init.d/60-cobald
|
||||
{%- endif %}
|
12
roles/cobald/vars/cobald-slurm.yml
Normal file
12
roles/cobald/vars/cobald-slurm.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
cobald_image_tag: slurm
|
||||
cobald_docker_base_image: "{{slurm.base_image}}"
|
||||
cobald_docker_default_command: False
|
||||
cobald_docker_network: "{{slurm.network}}"
|
||||
cobald_domainname: "{{slurm.domain}}"
|
||||
cobald_slurm_mounts: "{{slurm.mounts}}"
|
||||
#- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
##- "{{slurm_cfg_path | mandatory}}:/etc/slurm/:rw"
|
||||
#- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
## - "{{slurm_munge_path | mandatory}}:/etc/munge/munge.key:rw"
|
||||
#- slurm-shared:/shared/:rw
|
||||
## - "{{slurm_shared_path | mandatory}}:{{slurm_shared_target | default('/shared')}}:rw"
|
1
roles/cobald/vars/main.yml
Normal file
1
roles/cobald/vars/main.yml
Normal file
@@ -0,0 +1 @@
|
||||
cobald_docker_network: "{{docker_network}}"
|
11
roles/cobald_facts/tasks/main.yml
Normal file
11
roles/cobald_facts/tasks/main.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
- block:
|
||||
- docker_container_info:
|
||||
name: "{{ container_name | mandatory }}"
|
||||
register: cobald_container_info
|
||||
|
||||
- set_fact:
|
||||
cobald_container_hostname: |-
|
||||
{{cobald_container_info.container.Config.Hostname | default('cobald-'+
|
||||
lookup('password', '/dev/null chars=ascii_lowercase length=6')) }}
|
||||
when: cobald_container_hostname is not defined
|
||||
|
@@ -3,3 +3,6 @@ slurm_user: slurm
|
||||
slurm_log_path_ctld: /var/log/slurm/slurmctld.log
|
||||
slurm_log_path_d: /var/log/slurm/slurmd.log
|
||||
slurm_log_path_sched: /var/log/slurm/slurmsched.log
|
||||
slurm_prefix: slurm
|
||||
slurm_domain: slurm.local
|
||||
docker_network: slurm
|
||||
|
@@ -1,3 +1,3 @@
|
||||
- name: reconfigure slurm
|
||||
command:
|
||||
cmd: docker container exec -it slurm-ctl scontrol reconfigure
|
||||
shell:
|
||||
cmd: "docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure || docker container restart {{slurm_prefix}}-ctl && docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure"
|
||||
|
@@ -5,7 +5,7 @@
|
||||
|
||||
- include_role:
|
||||
name: slurm_dockerimage
|
||||
loop:
|
||||
loop: # FIXME: default(omit)!
|
||||
- name: slurmctld
|
||||
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
|
||||
files:
|
||||
@@ -59,12 +59,14 @@
|
||||
loop:
|
||||
- slurm.conf
|
||||
- cgroup.conf
|
||||
vars:
|
||||
alloc_nodes: "{{ [ slurm_prefix+'-submit1' ] + extra_nodes | default([])}}"
|
||||
notify: reconfigure slurm
|
||||
tags: [ slurm-config ]
|
||||
|
||||
- name: "create docker network to make service discovery work"
|
||||
docker_network:
|
||||
name: slurm
|
||||
name: "{{ docker_network }}"
|
||||
state: present
|
||||
|
||||
- name: "create docker volume for shared access between nodes"
|
||||
@@ -73,7 +75,7 @@
|
||||
state: present
|
||||
|
||||
- set_fact:
|
||||
slurm_nodes: # default nodes: controller and submit machine
|
||||
slurm_nodes_std: # default nodes: controller and submit machine
|
||||
- machine: ctl
|
||||
image: slurm:slurmctld
|
||||
exposed_ports: [ "6817:6817/tcp" ]
|
||||
@@ -81,10 +83,18 @@
|
||||
image: slurm:slurmd
|
||||
extra_mounts:
|
||||
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
||||
# - machine: slurm-cobald
|
||||
# image: slurm-cobald
|
||||
# extra_mounts:
|
||||
# # TODO
|
||||
slurm_nodes_exec: | # extend range to execute nodes list
|
||||
{% set slurm_nodes_exec = slurm_nodes_exec | default([]) %}
|
||||
{% for i in range(1, num_nodes+1) -%}
|
||||
{% set _ = slurm_nodes_exec.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes_exec }}
|
||||
slurm_default_mounts:
|
||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
- slurm-shared:/shared/:rw
|
||||
slurm_network: "{{docker_network}}"
|
||||
tags: [ slurm-config ]
|
||||
|
||||
# TODO: reserve some address using docker_network_info and assign as aux
|
||||
@@ -93,16 +103,16 @@
|
||||
|
||||
- name: run slurm docker containers
|
||||
docker_container:
|
||||
name: "slurm-{{item.machine}}"
|
||||
hostname: "slurm-{{item.machine}}"
|
||||
domainname: "slurm.local"
|
||||
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||
name: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
hostname: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
domainname: "{{ slurm_domain }}"
|
||||
volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||
ports: "{{ item.exposed_ports | default([]) }}"
|
||||
networks:
|
||||
- name: "slurm"
|
||||
- name: "{{ slurm_network }}"
|
||||
env:
|
||||
slurmuser: "{{slurm_user}}"
|
||||
image: "{{item.image}}"
|
||||
slurmuser: "{{ slurm_user }}"
|
||||
image: "{{ item.image }}"
|
||||
state: started
|
||||
detach: True
|
||||
cleanup: True
|
||||
@@ -110,15 +120,17 @@
|
||||
networks_cli_compatible: True
|
||||
interactive: True
|
||||
vars:
|
||||
default_mounts:
|
||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
- slurm-shared:/shared/:rw
|
||||
slurm_nodes_all: | # add execute nodes
|
||||
{% for i in range(1, num_nodes+1) -%}
|
||||
{% set _ = slurm_nodes.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes }}
|
||||
loop: "{{slurm_nodes_all}}"
|
||||
slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}"
|
||||
loop: "{{ slurm_nodes_all }}"
|
||||
loop_control:
|
||||
label: "{{slurm_prefix}}-{{ item.machine }}"
|
||||
tags: [ slurm-config ]
|
||||
|
||||
- name: set facts to be used by other modules
|
||||
set_fact:
|
||||
slurm:
|
||||
user: "{{slurm_user}}"
|
||||
domain: "{{slurm_domain}}"
|
||||
base_image: "slurm:base"
|
||||
mounts: "{{slurm_default_mounts}}"
|
||||
network: "{{docker_network}}"
|
||||
|
@@ -164,5 +164,7 @@ SlurmSchedLogFile={{slurm_log_path_sched}}
|
||||
#
|
||||
# COMPUTE NODES
|
||||
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
|
||||
NodeName=slurm-submit1 CPUs=1 State=UNKNOWN
|
||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes=slurm-submit1 Default=YES MaxTime=INFINITE State=UP
|
||||
{% for i in alloc_nodes -%}
|
||||
NodeName={{i}} State=UNKNOWN
|
||||
{% endfor %}
|
||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes={{alloc_nodes | join(',')}} Default=YES MaxTime=INFINITE State=UP
|
||||
|
@@ -5,5 +5,3 @@ if [ -f "/etc/munge/munge.key" ] ; then
|
||||
chown munge:munge /etc/munge/munge.key
|
||||
chmod 600 /etc/munge/munge.key
|
||||
fi
|
||||
|
||||
exec "$@"
|
8
roles/slurm_dockerimage/files/entrypoint.sh
Normal file
8
roles/slurm_dockerimage/files/entrypoint.sh
Normal file
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
for i in /usr/local/lib/entrypoints.d/* ; do
|
||||
[ -f $i ] && /bin/sh $i || break
|
||||
done
|
||||
|
||||
exec "${@:-/bin/bash}"
|
@@ -7,10 +7,12 @@ RUN yum install -y epel-release && \
|
||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
# FIXME
|
||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||
RUN mkdir -p /usr/local/lib/entrypoints.d/
|
||||
|
||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
||||
COPY --chown=root:root entry-munge.sh /usr/local/lib/entrypoints.d/10-munge.sh
|
||||
COPY --chown=root:root entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||
|
||||
RUN chmod 755 /usr/local/lib/entrypoints.d/10-munge.sh && \
|
||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||
|
@@ -23,6 +23,8 @@
|
||||
group: root
|
||||
mode: u=rwx,g=rx,o=rx
|
||||
loop: "{{ image.files | default([]) }}"
|
||||
loop_control:
|
||||
label: "{{ item.dest }}"
|
||||
register: slurm_cp_files
|
||||
|
||||
- docker_image:
|
||||
|
@@ -16,6 +16,7 @@
|
||||
- file: slurm-base.Dockerfile
|
||||
perms: u=rw,g=r,o=r
|
||||
- file: entrypoint.sh
|
||||
- file: entry-munge.sh
|
||||
- file: docker-init
|
||||
- file: start-scripts/10-munge
|
||||
when: not (slurm_baseimg_build_chg | default(False))
|
||||
|
Reference in New Issue
Block a user