WIP: cobald container containing and using slurm
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
|
||||
- include_role:
|
||||
name: slurm_dockerimage
|
||||
loop:
|
||||
loop: # FIXME: default(omit)!
|
||||
- name: slurmctld
|
||||
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
|
||||
files:
|
||||
@@ -59,12 +59,14 @@
|
||||
loop:
|
||||
- slurm.conf
|
||||
- cgroup.conf
|
||||
vars:
|
||||
alloc_nodes: "{{ [ slurm_prefix+'-submit1' ] + extra_nodes | default([])}}"
|
||||
notify: reconfigure slurm
|
||||
tags: [ slurm-config ]
|
||||
|
||||
- name: "create docker network to make service discovery work"
|
||||
docker_network:
|
||||
name: slurm
|
||||
name: "{{ docker_network }}"
|
||||
state: present
|
||||
|
||||
- name: "create docker volume for shared access between nodes"
|
||||
@@ -73,7 +75,7 @@
|
||||
state: present
|
||||
|
||||
- set_fact:
|
||||
slurm_nodes: # default nodes: controller and submit machine
|
||||
slurm_nodes_std: # default nodes: controller and submit machine
|
||||
- machine: ctl
|
||||
image: slurm:slurmctld
|
||||
exposed_ports: [ "6817:6817/tcp" ]
|
||||
@@ -81,10 +83,18 @@
|
||||
image: slurm:slurmd
|
||||
extra_mounts:
|
||||
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
||||
# - machine: slurm-cobald
|
||||
# image: slurm-cobald
|
||||
# extra_mounts:
|
||||
# # TODO
|
||||
slurm_nodes_exec: | # extend range to execute nodes list
|
||||
{% set slurm_nodes_exec = slurm_nodes_exec | default([]) %}
|
||||
{% for i in range(1, num_nodes+1) -%}
|
||||
{% set _ = slurm_nodes_exec.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes_exec }}
|
||||
slurm_default_mounts:
|
||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
- slurm-shared:/shared/:rw
|
||||
slurm_network: "{{docker_network}}"
|
||||
tags: [ slurm-config ]
|
||||
|
||||
# TODO: reserve some address using docker_network_info and assign as aux
|
||||
@@ -93,16 +103,16 @@
|
||||
|
||||
- name: run slurm docker containers
|
||||
docker_container:
|
||||
name: "slurm-{{item.machine}}"
|
||||
hostname: "slurm-{{item.machine}}"
|
||||
domainname: "slurm.local"
|
||||
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||
name: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
hostname: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
domainname: "{{ slurm_domain }}"
|
||||
volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||
ports: "{{ item.exposed_ports | default([]) }}"
|
||||
networks:
|
||||
- name: "slurm"
|
||||
- name: "{{ slurm_network }}"
|
||||
env:
|
||||
slurmuser: "{{slurm_user}}"
|
||||
image: "{{item.image}}"
|
||||
slurmuser: "{{ slurm_user }}"
|
||||
image: "{{ item.image }}"
|
||||
state: started
|
||||
detach: True
|
||||
cleanup: True
|
||||
@@ -110,15 +120,17 @@
|
||||
networks_cli_compatible: True
|
||||
interactive: True
|
||||
vars:
|
||||
default_mounts:
|
||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
- slurm-shared:/shared/:rw
|
||||
slurm_nodes_all: | # add execute nodes
|
||||
{% for i in range(1, num_nodes+1) -%}
|
||||
{% set _ = slurm_nodes.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes }}
|
||||
loop: "{{slurm_nodes_all}}"
|
||||
slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}"
|
||||
loop: "{{ slurm_nodes_all }}"
|
||||
loop_control:
|
||||
label: "{{slurm_prefix}}-{{ item.machine }}"
|
||||
tags: [ slurm-config ]
|
||||
|
||||
- name: set facts to be used by other modules
|
||||
set_fact:
|
||||
slurm:
|
||||
user: "{{slurm_user}}"
|
||||
domain: "{{slurm_domain}}"
|
||||
base_image: "slurm:base"
|
||||
mounts: "{{slurm_default_mounts}}"
|
||||
network: "{{docker_network}}"
|
||||
|
Reference in New Issue
Block a user