Notice: also trying to run slurmd on execute nodes as user makes no sense because it breaks sbatch. Furthermore there is another necessary to run mpi jobs (just tried MpiDefault=none). I don't consider running slurmd as root a good idea, but there seems to be no other choice at the moment.
106 lines
2.6 KiB
YAML
106 lines
2.6 KiB
YAML
- name: 'install slurm tools on host'
|
|
yum:
|
|
name: [ slurm, slurm-doc ]
|
|
state: present
|
|
|
|
- include_tasks: dockerimage.yml
|
|
loop:
|
|
- slurmctld
|
|
- slurmd
|
|
|
|
- name: generate munge key
|
|
shell:
|
|
cmd: dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key
|
|
creates: /etc/munge/munge.key
|
|
|
|
- name: set permissions on munge key
|
|
file:
|
|
path: /etc/munge/munge.key
|
|
owner: munge
|
|
group: munge
|
|
mode: u=rw,g=,o=
|
|
|
|
- file:
|
|
path: /container/volumes/munge
|
|
state: directory
|
|
owner: munge
|
|
group: munge
|
|
mode: u=rwx,g=,o=
|
|
|
|
- name: copy munge key to docker containers
|
|
copy:
|
|
remote_src: True
|
|
force: true
|
|
mode: preserve
|
|
src: /etc/munge/munge.key
|
|
dest: /container/volumes/munge/munge.key
|
|
|
|
- file:
|
|
path: /container/volumes/slurm/
|
|
state: directory
|
|
|
|
- name: upload slurm config
|
|
template:
|
|
force: true
|
|
src: "{{item}}.j2"
|
|
dest: "/container/volumes/slurm/{{item}}"
|
|
loop:
|
|
- slurm.conf
|
|
- cgroup.conf
|
|
notify: reconfigure slurm
|
|
tags: [ slurm-config ]
|
|
|
|
- name: "create docker network to make service discovery work"
|
|
docker_network:
|
|
name: slurm
|
|
state: present
|
|
|
|
- name: "create docker volume for shared access between nodes"
|
|
docker_volume:
|
|
name: slurm-shared
|
|
state: present
|
|
|
|
- set_fact:
|
|
slurm_nodes: # default nodes: controller and submit machine
|
|
- machine: ctl
|
|
image: slurm-slurmctld
|
|
- machine: submit1
|
|
image: slurm-slurmd
|
|
extra_mounts:
|
|
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
|
tags: [ slurm-config ]
|
|
|
|
# TODO: reserve some address using docker_network_info and assign as aux
|
|
# address to enable slurmctld to get a static address in order to be
|
|
# reachable from slurm running on docker host to enable submitting jobs.
|
|
|
|
- name: run slurm docker containers
|
|
docker_container:
|
|
name: "slurm-{{item.machine}}"
|
|
hostname: "slurm-{{item.machine}}"
|
|
domainname: "slurm.local"
|
|
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
|
networks:
|
|
- name: "slurm"
|
|
env:
|
|
slurmuser: "{{slurm_user}}"
|
|
image: "{{item.image}}"
|
|
state: started
|
|
detach: True
|
|
cleanup: True
|
|
privileged: "{{ container_privileged | bool }}"
|
|
networks_cli_compatible: True
|
|
vars:
|
|
default_mounts:
|
|
- /container/volumes/slurm/:/etc/slurm/:rw
|
|
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
|
- slurm-shared:/shared/:rw
|
|
slurm_nodes_all: | # add execute nodes
|
|
{% for i in range(1, 4) -%}
|
|
{% set _ = slurm_nodes.extend([
|
|
{'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%}
|
|
{%- endfor %}
|
|
{{ slurm_nodes }}
|
|
loop: "{{slurm_nodes_all}}"
|
|
tags: [ slurm-config ]
|