slurm startup

This commit is contained in:
2021-04-26 17:20:54 +02:00
parent 53502213bc
commit 4586fa7092
3 changed files with 56 additions and 1 deletions

View File

@@ -16,4 +16,5 @@ RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/e
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
# CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmctld -D & })'
# ... & tail --retry --pid $! -f /var/log/condor/MasterLog & })'

View File

@@ -14,3 +14,4 @@ RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/e
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
# CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmd -D & })'

View File

@@ -34,3 +34,56 @@
mode: preserve
src: /etc/munge/munge.key
dest: /container/volumes/munge/munge.key
- file:
path: /container/volumes/slurm/
state: directory
- name: copy slurm config
copy:
force: true
src: "{{item}}"
dest: "/container/volumes/slurm/{{item}}"
loop:
- slurm.conf
- cgroup.conf
tags: [ slurm-config ]
- name: "create docker network to make service discovery work"
docker_network:
name: slurm
state: present
# TODO: reserve some address using docker_network_info and assign as aux
# address to enable slurmctld to get a static address in order to be
# reachable from slurm running on docker host to enable submitting jobs.
- name: run slurm docker containers
docker_container:
name: "slurm-{{item.machine}}"
hostname: "slurm-{{item.machine}}"
domainname: "slurm.local"
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
networks:
- name: "slurm"
image: "{{item.image}}"
state: started
detach: True
cleanup: True
networks_cli_compatible: True
vars:
default_mounts:
- /container/volumes/slurm/:/etc/slurm/:rw
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
loop:
- machine: ctl
image: slurm-slurmctld
- machine: exec1
image: slurm-slurmd
- machine: exec2
image: slurm-slurmd
- machine: submit1
image: slurm-slurmd
extra_mounts:
- "/home/thoto/job3/:/mnt/:rw"
tags: [ slurm-config ]