141 lines
3.9 KiB
YAML
141 lines
3.9 KiB
YAML
- name: 'install slurm tools on host'
|
|
yum:
|
|
name: [ slurm, slurm-doc ]
|
|
state: present
|
|
|
|
- name: build docker images for slurm
|
|
include_tasks:
|
|
file: dockerimage.yml
|
|
loop:
|
|
- name: slurmctld
|
|
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
|
|
files:
|
|
- dest: start-scripts/20-slurmctld
|
|
content: "{{ lookup('file', 'start-scripts/20-slurmctld') }}"
|
|
- name: slurmd
|
|
dockerfile: "{{ lookup('file', 'slurmd.Dockerfile') }}"
|
|
files:
|
|
- dest: start-scripts/30-slurmd
|
|
content: "{{ lookup('file', 'start-scripts/30-slurmd') }}"
|
|
vars:
|
|
image_name: "{{image.name | default(omit) }}"
|
|
dockerfile: "{{image.dockerfile | default(omit) }}"
|
|
files: "{{image.files | default(omit) }}"
|
|
loop_control:
|
|
loop_var: image
|
|
label: "{{ image.name }}"
|
|
|
|
- name: generate munge key
|
|
shell:
|
|
cmd: dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key
|
|
creates: /etc/munge/munge.key
|
|
|
|
- name: set permissions on munge key
|
|
file:
|
|
path: /etc/munge/munge.key
|
|
owner: munge
|
|
group: munge
|
|
mode: u=rw,g=,o=
|
|
|
|
- name: create munge key directory for containers
|
|
file:
|
|
path: /container/volumes/munge
|
|
state: directory
|
|
owner: munge
|
|
group: munge
|
|
mode: u=rwx,g=,o=
|
|
|
|
- name: copy munge key to docker containers
|
|
copy:
|
|
remote_src: True
|
|
force: true
|
|
mode: preserve
|
|
src: /etc/munge/munge.key
|
|
dest: /container/volumes/munge/munge.key
|
|
|
|
- name: make slurm directory
|
|
file:
|
|
path: /container/volumes/slurm/
|
|
state: directory
|
|
|
|
- name: "create docker network to make service discovery work"
|
|
docker_network:
|
|
name: "{{ docker_network }}"
|
|
state: present
|
|
register: slurm_network_data
|
|
tags: slurm-config
|
|
|
|
- name: upload slurm config
|
|
template:
|
|
force: true
|
|
src: "{{item}}.j2"
|
|
dest: "/container/volumes/slurm/{{item}}"
|
|
loop:
|
|
- slurm.conf
|
|
- cgroup.conf
|
|
vars:
|
|
slurm_exec_node_cores: 3
|
|
slurm_exec_node_mem: 5000 # RealMemory=5964
|
|
slurm_alloc_nodes_default:
|
|
- name: "{{slurm_prefix+'-submit1'}}"
|
|
- name: "{{ inventory_hostname }}"
|
|
addr: "{{ slurm_network_data.network.IPAM.Config[0].Gateway }}"
|
|
alloc_nodes: "{{ slurm_alloc_nodes_default + extra_nodes | default([])}}"
|
|
partitions:
|
|
- name: cobald
|
|
nodeprefix: drone
|
|
num_nodes: 10
|
|
node_cores: 3
|
|
node_mem: 4900
|
|
port: 16818
|
|
initstate: FUTURE
|
|
notify: reconfigure slurm
|
|
tags: slurm-config
|
|
|
|
- import_tasks: host-config.yml
|
|
when: slurm_hostsystem_cluster_access | default(False)
|
|
|
|
- name: "create docker volume for shared access between nodes"
|
|
docker_volume:
|
|
name: slurm-shared
|
|
state: present
|
|
|
|
# TODO: reserve some address using docker_network_info and assign as aux
|
|
# address to enable slurmctld to get a static address in order to be
|
|
# reachable from slurm running on docker host to enable submitting jobs.
|
|
|
|
- name: run slurm docker containers
|
|
docker_container:
|
|
name: "{{ slurm_prefix }}-{{ item.machine }}"
|
|
hostname: "{{ slurm_prefix }}-{{ item.machine }}"
|
|
domainname: "{{ slurm_domain }}"
|
|
volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}"
|
|
ports: "{{ item.exposed_ports | default([]) }}"
|
|
networks:
|
|
- name: "{{ docker_network }}"
|
|
env:
|
|
slurmuser: "{{ slurm_user }}"
|
|
image: "{{ item.image }}"
|
|
state: started
|
|
detach: True
|
|
cleanup: True
|
|
privileged: "{{ container_privileged | bool }}"
|
|
networks_cli_compatible: True
|
|
interactive: True
|
|
vars: # see vars/main.yml
|
|
slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}"
|
|
loop: "{{ slurm_nodes_all }}"
|
|
loop_control:
|
|
label: "{{slurm_prefix}}-{{ item.machine }}"
|
|
tags: slurm-config
|
|
|
|
- name: export facts about slurm cluster to be used by other modules
|
|
set_fact:
|
|
slurm:
|
|
user: "{{slurm_user}}"
|
|
domain: "{{slurm_domain}}"
|
|
base_image: "slurm:base"
|
|
mounts: "{{slurm_default_mounts}}"
|
|
network: "{{docker_network}}"
|
|
tags: always
|