Files
cobald-testenv/roles/slurm/tasks/main.yml

127 lines
3.3 KiB
YAML

- name: 'install slurm tools on host'
yum:
name: [ slurm, slurm-doc ]
state: present
- include_role:
name: slurm_dockerimage
loop:
- name: slurmctld
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
start_scripts:
- name: 20-slurmctld
content: "{{ lookup('file', 'start-scripts/20-slurmctld') }}"
- name: slurmd
dockerfile: "{{ lookup('file', 'slurmd.Dockerfile') }}"
start_scripts:
- name: 30-slurmd
content: "{{ lookup('file', 'start-scripts/30-slurmd') }}"
loop_control:
loop_var: image
label: "{{ image.name }}"
vars:
build_base: True
- name: generate munge key
shell:
cmd: dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key
creates: /etc/munge/munge.key
- name: set permissions on munge key
file:
path: /etc/munge/munge.key
owner: munge
group: munge
mode: u=rw,g=,o=
- file:
path: /container/volumes/munge
state: directory
owner: munge
group: munge
mode: u=rwx,g=,o=
- name: copy munge key to docker containers
copy:
remote_src: True
force: true
mode: preserve
src: /etc/munge/munge.key
dest: /container/volumes/munge/munge.key
- file:
path: /container/volumes/slurm/
state: directory
- name: upload slurm config
template:
force: true
src: "{{item}}.j2"
dest: "/container/volumes/slurm/{{item}}"
loop:
- slurm.conf
- cgroup.conf
notify: reconfigure slurm
tags: [ slurm-config ]
- name: "create docker network to make service discovery work"
docker_network:
name: slurm
state: present
- name: "create docker volume for shared access between nodes"
docker_volume:
name: slurm-shared
state: present
- set_fact:
slurm_nodes: # default nodes: controller and submit machine
- machine: ctl
image: slurm:slurmctld
exposed_ports: [ "6817:6817/tcp" ]
- machine: submit1
image: slurm:slurmd
extra_mounts:
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
# - machine: slurm-cobald
# image: slurm-cobald
# extra_mounts:
# # TODO
tags: [ slurm-config ]
# TODO: reserve some address using docker_network_info and assign as aux
# address to enable slurmctld to get a static address in order to be
# reachable from slurm running on docker host to enable submitting jobs.
- name: run slurm docker containers
docker_container:
name: "slurm-{{item.machine}}"
hostname: "slurm-{{item.machine}}"
domainname: "slurm.local"
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
ports: "{{ item.exposed_ports | default([]) }}"
networks:
- name: "slurm"
env:
slurmuser: "{{slurm_user}}"
image: "{{item.image}}"
state: started
detach: True
cleanup: True
privileged: "{{ container_privileged | bool }}"
networks_cli_compatible: True
interactive: True
vars:
default_mounts:
- /container/volumes/slurm/:/etc/slurm/:rw
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
- slurm-shared:/shared/:rw
slurm_nodes_all: | # add execute nodes
{% for i in range(1, num_nodes+1) -%}
{% set _ = slurm_nodes.extend([
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
{%- endfor %}
{{ slurm_nodes }}
loop: "{{slurm_nodes_all}}"
tags: [ slurm-config ]