diff --git a/play.yml b/play.yml index 452eaec..5555b03 100644 --- a/play.yml +++ b/play.yml @@ -39,4 +39,5 @@ name: slurm vars: container_privileged: True + num_nodes: 3 tags: slurm diff --git a/roles/slurm/handlers/main.yml b/roles/slurm/handlers/main.yml new file mode 100644 index 0000000..b82ee7c --- /dev/null +++ b/roles/slurm/handlers/main.yml @@ -0,0 +1,3 @@ +- name: reconfigure slurm + command: + cmd: docker container exec -it slurm-ctl scontrol reconfigure diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml index 90c4faf..5732bd1 100644 --- a/roles/slurm/tasks/main.yml +++ b/roles/slurm/tasks/main.yml @@ -47,6 +47,7 @@ loop: - slurm.conf - cgroup.conf + notify: reconfigure slurm tags: [ slurm-config ] - name: "create docker network to make service discovery work" @@ -54,6 +55,16 @@ name: slurm state: present +- set_fact: + slurm_nodes: # default nodes: controller and submit machine + - machine: ctl + image: slurm-slurmctld + - machine: submit1 + image: slurm-slurmd + extra_mounts: + - "/home/thoto/job3/:/mnt/:rw" + tags: [ slurm-config ] + # TODO: reserve some address using docker_network_info and assign as aux # address to enable slurmctld to get a static address in order to be # reachable from slurm running on docker host to enable submitting jobs. @@ -70,21 +81,17 @@ state: started detach: True cleanup: True - privileged: "{{ container_privileged | ternary(True,False) }}" + privileged: "{{ container_privileged | bool }}" networks_cli_compatible: True vars: default_mounts: - /container/volumes/slurm/:/etc/slurm/:rw - /container/volumes/munge/munge.key:/etc/munge/munge.key:rw - loop: - - machine: ctl - image: slurm-slurmctld - - machine: exec1 - image: slurm-slurmd - - machine: exec2 - image: slurm-slurmd - - machine: submit1 - image: slurm-slurmd - extra_mounts: - - "/home/thoto/job3/:/mnt/:rw" + slurm_nodes_all: | # add execute nodes + {% for i in range(1, 4) -%} + {% set _ = slurm_nodes.extend([ + {'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%} + {%- endfor %} + {{ slurm_nodes }} + loop: "{{slurm_nodes_all}}" tags: [ slurm-config ] diff --git a/roles/slurm/templates/slurm.conf.j2 b/roles/slurm/templates/slurm.conf.j2 index 1441dda..6b4c4f5 100644 --- a/roles/slurm/templates/slurm.conf.j2 +++ b/roles/slurm/templates/slurm.conf.j2 @@ -161,7 +161,6 @@ SlurmdDebug=verbose # # # COMPUTE NODES -NodeName=slurm-exec1 CPUs=1 State=UNKNOWN -NodeName=slurm-exec2 CPUs=1 State=UNKNOWN +NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN NodeName=slurm-submit1 CPUs=1 State=UNKNOWN -PartitionName=debug Nodes=slurm-exec1,slurm-exec2 Default=YES MaxTime=INFINITE State=UP +PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] Default=YES MaxTime=INFINITE State=UP