Files
cobald-testenv/roles/slurm/tasks/main.yml

139 lines
3.8 KiB
YAML

- name: 'install slurm tools on host'
yum:
name: [ slurm, slurm-doc ]
state: present
- name: build docker images for slurm
include_tasks:
file: dockerimage.yml
loop:
- name: slurmctld
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
files:
- dest: start-scripts/20-slurmctld
content: "{{ lookup('file', 'start-scripts/20-slurmctld') }}"
- name: slurmd
dockerfile: "{{ lookup('file', 'slurmd.Dockerfile') }}"
files:
- dest: start-scripts/30-slurmd
content: "{{ lookup('file', 'start-scripts/30-slurmd') }}"
vars:
image_name: "{{image.name | default(omit) }}"
dockerfile: "{{image.dockerfile | default(omit) }}"
files: "{{image.files | default(omit) }}"
loop_control:
loop_var: image
label: "{{ image.name }}"
- name: generate munge key
shell:
cmd: dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key
creates: /etc/munge/munge.key
- name: set permissions on munge key
file:
path: /etc/munge/munge.key
owner: munge
group: munge
mode: u=rw,g=,o=
- name: create munge key directory for containers
file:
path: /container/volumes/munge
state: directory
owner: munge
group: munge
mode: u=rwx,g=,o=
- name: copy munge key to docker containers
copy:
remote_src: True
force: true
mode: preserve
src: /etc/munge/munge.key
dest: /container/volumes/munge/munge.key
- name: make slurm directory
file:
path: /container/volumes/slurm/
state: directory
- name: "create docker network to make service discovery work"
docker_network:
name: "{{ docker_network }}"
state: present
register: slurm_network_data
tags: slurm-config
- name: upload slurm config
template:
force: true
src: "{{item}}.j2"
dest: "/container/volumes/slurm/{{item}}"
loop:
- slurm.conf
- cgroup.conf
vars:
slurm_exec_node_cores: 3
slurm_exec_node_mem: 5000 # RealMemory=5964
slurm_alloc_nodes_default:
- name: "{{slurm_prefix+'-submit1'}}"
- name: "{{ inventory_hostname }}"
addr: "{{ slurm_network_data.network.IPAM.Config[0].Gateway }}"
alloc_nodes: "{{ slurm_alloc_nodes_default + extra_nodes | default([])}}"
partitions:
- name: cobald
nodeprefix: drone
num_nodes: 10
node_cores: 3
node_mem: 4900
port: 16818
initstate: FUTURE
notify: reconfigure slurm
tags: slurm-config
- name: "create docker volume for shared access between nodes"
docker_volume:
name: slurm-shared
state: present
- name: run slurm docker containers
docker_container:
name: "{{ slurm_prefix }}-{{ item.machine }}"
hostname: "{{ slurm_prefix }}-{{ item.machine }}"
domainname: "{{ slurm_domain }}"
volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}"
ports: "{{ item.exposed_ports | default([]) }}"
networks:
- name: "{{ docker_network }}"
aliases: "{{ item.aliases | default(omit) }}"
env:
slurmuser: "{{ slurm_user }}"
image: "{{ item.image }}"
state: started
detach: True
cleanup: True
privileged: "{{ container_privileged | bool }}"
networks_cli_compatible: True
interactive: True
vars: # see vars/main.yml
slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}"
loop: "{{ slurm_nodes_all }}"
loop_control:
label: "{{slurm_prefix}}-{{ item.machine }}"
tags: slurm-config
- name: configure host system to integrate into slurm cluster
import_tasks: host-config.yml
when: slurm_hostsystem_cluster_access | default(False)
- name: export facts about slurm cluster to be used by other modules
set_fact:
slurm:
user: "{{slurm_user}}"
domain: "{{slurm_domain}}"
base_image: "slurm:base"
mounts: "{{slurm_default_mounts}}"
network: "{{docker_network}}"
tags: always