Compare commits
12 Commits
cd7dea8fda
...
cobaldtard
Author | SHA1 | Date | |
---|---|---|---|
4c63f2a825
|
|||
51390bb321
|
|||
52022a3013
|
|||
574d2fcb4e
|
|||
2919c98d5f
|
|||
f73fef1473
|
|||
8bc2f717e0
|
|||
d88761ca7d
|
|||
3be5025442
|
|||
4c4c4da79d
|
|||
1a952a4e7a
|
|||
74a760cf98
|
8
base.yml
8
base.yml
@@ -11,6 +11,14 @@
|
||||
name: [ vim-enhanced, htop, screen, bind-utils, nmap-ncat, net-tools ]
|
||||
state: present
|
||||
|
||||
- name: "screenrc native scrolling in tmux"
|
||||
copy:
|
||||
content: "termcapinfo xterm* ti@:te@\ntermcapinfo screen* ti@:te@\n"
|
||||
dest: "{{item}}"
|
||||
with_items:
|
||||
- "~{{unpriv_user}}/.screenrc"
|
||||
- "~root/.screenrc"
|
||||
|
||||
- name: "install ssh-key"
|
||||
authorized_key:
|
||||
user: "{{cfg_unpriv_user}}"
|
||||
|
@@ -43,6 +43,7 @@
|
||||
# containers runtime name (not hostname supplied!) and netname is
|
||||
# the network name in host environment. We should run our own dns...
|
||||
docker_network: slurm
|
||||
slurm_hostsystem_cluster_access: True
|
||||
when: '"slurm" in group_names'
|
||||
tags: slurm, cobald, influxdb, slurm-config
|
||||
# tags: cobald requires some slurm facts, so cobald tag is included here
|
||||
@@ -57,4 +58,4 @@
|
||||
container_name: cobald
|
||||
# docker_network: slurm # overriden by vars/slurm.yml
|
||||
when: '"cobald" in group_names'
|
||||
tags: cobald, influxdb
|
||||
tags: cobald, influxdb, singularity
|
||||
|
@@ -20,7 +20,7 @@
|
||||
tags: htcondor
|
||||
|
||||
- name: "setup singularity"
|
||||
import_tasks: "singularity.yml"
|
||||
import_role: name="singularity"
|
||||
tags: singularity
|
||||
|
||||
- name: "setup docker"
|
||||
|
3
roles/cobald/files/28-sync-container-slurmd
Normal file
3
roles/cobald/files/28-sync-container-slurmd
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
[ /slurm-singimage/slurmd.sif -nt /shared/slurmd.sif ] && \
|
||||
cp /slurm-singimage/slurmd.sif /shared/slurmd.sif
|
3
roles/cobald/files/31-slurmd-configless
Normal file
3
roles/cobald/files/31-slurmd-configless
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
slurmd --conf-server ${slurmctld} -D -N ${nodename} 2>/dev/null 1>/dev/null &
|
||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
11
roles/cobald/files/cgroup.conf.noautomount
Normal file
11
roles/cobald/files/cgroup.conf.noautomount
Normal file
@@ -0,0 +1,11 @@
|
||||
###
|
||||
#
|
||||
# Slurm cgroup support configuration file
|
||||
#
|
||||
# See man slurm.conf and man cgroup.conf for further
|
||||
# information on cgroup configuration parameters
|
||||
#--
|
||||
CgroupAutomount=no
|
||||
|
||||
ConstrainCores=no
|
||||
ConstrainRAMSpace=no
|
31
roles/cobald/files/cobald-config/config.yaml
Normal file
31
roles/cobald/files/cobald-config/config.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
---
|
||||
pipeline:
|
||||
- __type__: cobald.controller.linear.LinearController
|
||||
low_utilisation: 0.9
|
||||
high_allocation: 0.9
|
||||
rate: 0.10
|
||||
- !Limiter
|
||||
minimum: 3
|
||||
- !TelegrafPipelineMonitor
|
||||
poll: True
|
||||
- !TardisPoolFactory
|
||||
configuration: /etc/cobald/tardis.yaml
|
||||
logging:
|
||||
version: 1
|
||||
root:
|
||||
level: DEBUG
|
||||
handlers: [console, file]
|
||||
handlers:
|
||||
console:
|
||||
class: logging.StreamHandler
|
||||
formatter: test
|
||||
level: DEBUG
|
||||
stream: ext://sys.stderr
|
||||
file:
|
||||
class: logging.handlers.RotatingFileHandler
|
||||
formatter: test
|
||||
level: WARNING
|
||||
filename: /var/log/cobald/cobald-tardis.log
|
||||
formatters:
|
||||
test:
|
||||
format: " %(name)s %(message)s"
|
41
roles/cobald/files/cobald-config/tardis.yaml
Normal file
41
roles/cobald/files/cobald-config/tardis.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
Plugins:
|
||||
SqliteRegistry:
|
||||
db_file: /tmp/drone_registry.db
|
||||
TelegrafMonitoring:
|
||||
host: ed-telegraf
|
||||
port: 8094
|
||||
#BatchSystem:
|
||||
# adapter: FakeBatchSystem
|
||||
# allocation: 1.0
|
||||
# utilisation: !PeriodicValue
|
||||
# period: 60
|
||||
# amplitude: 0.15
|
||||
# offset: 0.80
|
||||
## phase: 1.
|
||||
# phase: 1.6
|
||||
# machine_status: Available
|
||||
BatchSystem:
|
||||
adapter: Slurm
|
||||
max_age: 0.1
|
||||
options:
|
||||
partition: cobald
|
||||
Sites:
|
||||
- name: slurmtest
|
||||
adapter: Slurm
|
||||
quota: 20
|
||||
slurmtest:
|
||||
# executor: ...
|
||||
StatusUpdate: 0.1
|
||||
MachineTypes:
|
||||
- m1.a
|
||||
MachineTypeConfiguration:
|
||||
m1.a:
|
||||
Walltime: 5
|
||||
Partition: container
|
||||
StartupCommand: /usr/local/bin/start-drone
|
||||
# SubmitOptions: ...
|
||||
MachineMetaData:
|
||||
m1.a:
|
||||
Cores: 3 # cores
|
||||
Memory: 1 # GB
|
||||
Disk: 4 # not passed
|
31
roles/cobald/files/slurm-slurmd.def
Normal file
31
roles/cobald/files/slurm-slurmd.def
Normal file
@@ -0,0 +1,31 @@
|
||||
Bootstrap: docker-daemon
|
||||
From: slurm:slurmd
|
||||
|
||||
%files
|
||||
31-slurmd-configless /etc/docker-init.d/31-slurm-configless
|
||||
/container/volumes/munge/munge.key /etc/munge/munge.key
|
||||
cgroup.conf.noautomount /etc/slurm/cgroup.conf
|
||||
|
||||
%post
|
||||
rm /etc/docker-init.d/30-slurmd
|
||||
chmod 755 /etc/docker-init.d/31-slurm-configless
|
||||
|
||||
%startscript
|
||||
if [ -z "${1}" -o -z "${2}" ] ; then
|
||||
echo "undefined variables slurmctld or nodename"
|
||||
exit 1
|
||||
fi
|
||||
export slurmctld="${1}"
|
||||
export nodename="${2}"
|
||||
echo ${slurmctld} ${nodename} ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||
exec /usr/local/sbin/entrypoint.sh /usr/local/sbin/docker-init
|
||||
|
||||
%runscript
|
||||
if [ -z "${1}" -o -z "${2}" ] ; then
|
||||
echo "undefined variables slurmctld or nodename"
|
||||
exit 1
|
||||
fi
|
||||
export slurmctld="${1}"
|
||||
export nodename="${2}"
|
||||
echo ${slurmctld} ${nodename} ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||
exec /usr/local/sbin/entrypoint.sh /usr/local/sbin/docker-init
|
59
roles/cobald/files/start-drone
Normal file
59
roles/cobald/files/start-drone
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/bin/sh
|
||||
#SBATCH -D /shared
|
||||
export
|
||||
echo $@
|
||||
nodename=$(hostname | awk '{ print "drone" substr($1,match($1, "([[:digit:]]+)")) }')
|
||||
|
||||
SHUTDOWN_DONE=0
|
||||
|
||||
function handler_quit(){
|
||||
[ $SHUTDOWN_DONE -ne 0 ] && return
|
||||
set -x
|
||||
echo "drain container"
|
||||
scontrol update NodeName=${nodename} State=DRAIN Reason="cobald node quit"
|
||||
shutdown_jobs=$(squeue -w ${nodename} --noheader -O jobid)
|
||||
[ -n "${shutdown_jobs}" ] && scancel ${shutdown_jobs}
|
||||
#scancel -w ${nodename}
|
||||
i=$(( $(scontrol show config | grep KillWait | \
|
||||
sed 's/^KillWait.*= \([0-9]*\) sec/\1/') - 2 ))
|
||||
while [ -n "$(squeue -w ${nodename} --noheader -O jobid)" -o ${i} -lt 1 ]
|
||||
do
|
||||
i=$(( ${i} - 1 ))
|
||||
sleep 1
|
||||
done
|
||||
scancel -s KILL -w ${nodename} # hard kill all remaining jobs
|
||||
echo "shutdown container"
|
||||
scontrol update NodeName=${nodename} State=DOWN Reason=shutdown
|
||||
singularity instance stop slurm-drone
|
||||
scontrol update NodeName=${nodename} State=FUTURE
|
||||
umount /inner-cgroup/freezer
|
||||
umount /inner-cgroup
|
||||
SHUTDOWN_DONE=1
|
||||
exit 0
|
||||
}
|
||||
|
||||
# set -x
|
||||
|
||||
trap handler_quit EXIT
|
||||
|
||||
echo "mounting cgroups"
|
||||
mkdir /inner-cgroup
|
||||
mount -t tmpfs none /inner-cgroup
|
||||
mkdir /inner-cgroup/freezer/
|
||||
mount --bind /sys/fs/cgroup/freezer/slurm/ /inner-cgroup/freezer/
|
||||
mount -o remount,ro /inner-cgroup
|
||||
|
||||
echo "starting ${nodename}"
|
||||
scontrol update NodeName=${nodename} State=RESUME # revoke last DRAIN
|
||||
scontrol update NodeName=${nodename} State=FUTURE
|
||||
singularity instance start \
|
||||
-B /inner-cgroup/:/sys/fs/cgroup/ \
|
||||
--writable-tmpfs /shared/slurmd.sif slurm-drone \
|
||||
slurm-ctl ${nodename}
|
||||
# scontrol update NodeName=${nodename} NodeHostname=${SLURM_JOB_ID}
|
||||
scontrol update NodeName=${nodename} NodeHostname=${TardisDroneUuid}
|
||||
if [ $? -eq 0 ] ; then
|
||||
echo "container started, sleeping $(( 60 * ${SLURM_Walltime} - 2 ))"
|
||||
sleep $(( 60 * ${SLURM_Walltime} - 2 ))
|
||||
fi
|
||||
handler_quit
|
@@ -10,11 +10,16 @@
|
||||
slurm_image_prefix: cobald
|
||||
image_name: "{{cobald_image_tag}}"
|
||||
dockerfile: "{{ lookup('template', 'cobald.Dockerfile') }}"
|
||||
files:
|
||||
- dest: cobald-entrypoint.sh
|
||||
content: "{{ lookup('file', 'cobald-entrypoint.sh') }}"
|
||||
- dest: init-cobaldmodules.sh
|
||||
content: "{{ lookup('file', 'init-cobaldmodules.sh') }}"
|
||||
files_list:
|
||||
- cobald-entrypoint.sh
|
||||
- init-cobaldmodules.sh
|
||||
- start-drone
|
||||
- 28-sync-container-slurmd
|
||||
files: "
|
||||
{%- set files = [] -%} {%- for i in files_list -%}
|
||||
{%- set files = files.append(
|
||||
{ 'dest': i, 'content': lookup('file', i) }) -%}
|
||||
{%- endfor %}{{ files }}"
|
||||
when: cobald_slurm | default(False)
|
||||
|
||||
- name: build generic cobald docker image
|
||||
@@ -28,13 +33,14 @@
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
|
||||
- name: copy cobald config (does nothing yet)
|
||||
- name: copy cobald config
|
||||
copy:
|
||||
src: cobald-config/
|
||||
dest: /container/volumes/cobald
|
||||
dest: "~{{unpriv_user}}/cobald/"
|
||||
force: False
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
when: False
|
||||
mode: "0644"
|
||||
|
||||
- name: ensure network for cobald container exists
|
||||
docker_network:
|
||||
@@ -64,7 +70,7 @@
|
||||
repo: https://github.com/thoto/cobald
|
||||
dest: "~{{unpriv_user}}/cobald-src"
|
||||
version: bugfix/mixed_construction_methods
|
||||
update: no
|
||||
update: no # FIXME
|
||||
become: yes
|
||||
become_user: "{{unpriv_user}}"
|
||||
register: cobald_git_pull
|
||||
@@ -74,6 +80,7 @@
|
||||
repo: https://github.com/MatterMiners/tardis
|
||||
dest: "~{{unpriv_user}}/tardis-src"
|
||||
version: master
|
||||
update: no # FIXME
|
||||
become: yes
|
||||
become_user: "{{unpriv_user}}"
|
||||
register: tardis_git_pull
|
||||
@@ -109,6 +116,13 @@
|
||||
include_tasks: facts.yml
|
||||
when: cobald_container_hostname is not defined
|
||||
|
||||
- name: build singularity container
|
||||
include_tasks:
|
||||
file: singularity.yml
|
||||
apply:
|
||||
tags: singularity
|
||||
tags: singularity
|
||||
|
||||
- name: run cobald container
|
||||
docker_container:
|
||||
name: "{{ container_name | default('cobald') }}"
|
||||
@@ -129,6 +143,7 @@
|
||||
cobald_mounts:
|
||||
- "~{{unpriv_user}}/cobald:/etc/cobald"
|
||||
# - /container/volumes/cobald:/etc/cobald:ro
|
||||
- "/container/docker-images/sing-slurmd/build/:/slurm-singimage/:ro"
|
||||
- "~{{unpriv_user}}/cobald/modules:/usr/local/src/cobaldmodules"
|
||||
- "~{{unpriv_user}}/cobald-src:/usr/local/src/cobald:ro"
|
||||
- "~{{unpriv_user}}/tardis-src:/usr/local/src/tardis:ro"
|
||||
|
48
roles/cobald/tasks/singularity.yml
Normal file
48
roles/cobald/tasks/singularity.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
- name: setup singularity
|
||||
import_role: name="singularity"
|
||||
tags: singularity
|
||||
|
||||
- name: make singularity image build directory
|
||||
file:
|
||||
state: directory
|
||||
path: "{{item}}"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: "docker"
|
||||
mode: "0755"
|
||||
loop:
|
||||
- /container/docker-images/sing-slurmd
|
||||
- /container/docker-images/sing-slurmd/cache
|
||||
- /container/docker-images/sing-slurmd/build
|
||||
|
||||
- name: copy slurm singularity container files
|
||||
copy:
|
||||
src: "{{item}}"
|
||||
dest: "/container/docker-images/sing-slurmd/{{item}}"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: "docker"
|
||||
loop:
|
||||
- slurm-slurmd.def
|
||||
- 31-slurmd-configless
|
||||
- cgroup.conf.noautomount
|
||||
register: cobald_copy_sing_files
|
||||
|
||||
- name: remove old container
|
||||
file:
|
||||
path: /container/docker-images/sing-slurmd/build/slurmd.sif
|
||||
state: absent
|
||||
when: cobald_copy_sing_files.changed
|
||||
|
||||
- name: build container
|
||||
shell:
|
||||
chdir: /container/docker-images/sing-slurmd/
|
||||
cmd: SINGULARITY_TMPDIR=/container/docker-images/sing-slurmd/cache
|
||||
singularity build --disable-cache
|
||||
/container/docker-images/sing-slurmd/build/slurmd.sif
|
||||
/container/docker-images/sing-slurmd/slurm-slurmd.def
|
||||
creates: /container/docker-images/sing-slurmd/build/slurmd.sif
|
||||
register: cobald_sing_build
|
||||
|
||||
- debug: msg="{{[cobald_sing_build.stdout, cobald_sing_build.stderr]}}"
|
||||
tags: [ never, debug ]
|
||||
|
||||
# TODO: trigger copy in cobald container when slurmd.sif rebuilt
|
@@ -48,6 +48,10 @@ COPY init-cobaldmodules.sh /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||
|
||||
RUN chmod 755 /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||
|
||||
COPY start-drone /usr/local/bin/start-drone
|
||||
COPY 28-sync-container-slurmd /etc/docker-init.d/28-sync-container-slurmd
|
||||
RUN chmod 755 /usr/local/bin/start-drone /etc/docker-init.d/28-sync-container-slurmd
|
||||
|
||||
RUN echo -e "#!/bin/sh\npython3 -m cobald.daemon /etc/cobald/config.yaml" >> /etc/docker-init.d/70-cobald && chmod 755 /etc/docker-init.d/70-cobald
|
||||
|
||||
{% if cobald_docker_default_command | default(True) -%}
|
||||
|
@@ -8,7 +8,7 @@
|
||||
value: "15000"
|
||||
sysctl_file: /etc/sysctl.d/90-max_net_namespaces.conf
|
||||
|
||||
- name: "enable user thoto for fakeroot access"
|
||||
- name: "enable user {{unpriv_user}} for fakeroot access"
|
||||
lineinfile:
|
||||
line: "{{unpriv_user}}:4294836224:65536"
|
||||
dest: "{{item}}"
|
@@ -9,3 +9,6 @@ RUN chmod 755 /etc/docker-init.d/30-slurmd
|
||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||
|
||||
RUN yum install -y singularity && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
35
roles/slurm/tasks/host-config.yml
Normal file
35
roles/slurm/tasks/host-config.yml
Normal file
@@ -0,0 +1,35 @@
|
||||
# TODO: this does not work quite right since slurm-ctl does not reach the host
|
||||
# system. sinfo, scontrol etc. work but srun does not!
|
||||
|
||||
- name: "get addresses from docker network"
|
||||
docker_network_info:
|
||||
name: "{{ docker_network }}"
|
||||
register: slurm_network_data
|
||||
|
||||
- name: link host slurm config
|
||||
file:
|
||||
path: "/etc/slurm/slurm.conf"
|
||||
src: "/container/volumes/slurm/slurm.conf"
|
||||
force: True
|
||||
state: link
|
||||
backup: True
|
||||
|
||||
- name: create slurm user
|
||||
user:
|
||||
name: slurm
|
||||
system: True
|
||||
|
||||
- name: place entry of slurm-ctl in host /etc/hosts
|
||||
lineinfile:
|
||||
line: "{{slurm_network_data.network.Containers | dict2items
|
||||
| json_query('[?value.Name==`slurm-ctl`].value.IPv4Address') | first
|
||||
| ipaddr('address') }}\tslurm-ctl"
|
||||
regexp: "^(\\S*)(\\s*)slurm-ctl$"
|
||||
path: /etc/hosts
|
||||
backup: True
|
||||
|
||||
- name: start munge locally
|
||||
service:
|
||||
name: munge
|
||||
enabled: True
|
||||
state: started
|
@@ -58,6 +58,13 @@
|
||||
path: /container/volumes/slurm/
|
||||
state: directory
|
||||
|
||||
- name: "create docker network to make service discovery work"
|
||||
docker_network:
|
||||
name: "{{ docker_network }}"
|
||||
state: present
|
||||
register: slurm_network_data
|
||||
tags: slurm-config
|
||||
|
||||
- name: upload slurm config
|
||||
template:
|
||||
force: true
|
||||
@@ -71,24 +78,25 @@
|
||||
slurm_exec_node_mem: 5000 # RealMemory=5964
|
||||
slurm_alloc_nodes_default:
|
||||
- name: "{{slurm_prefix+'-submit1'}}"
|
||||
- name: "{{ inventory_hostname }}"
|
||||
addr: "{{ slurm_network_data.network.IPAM.Config[0].Gateway }}"
|
||||
alloc_nodes: "{{ slurm_alloc_nodes_default + extra_nodes | default([])}}"
|
||||
partitions:
|
||||
- name: cobald
|
||||
nodeprefix: drone
|
||||
num_nodes: 10
|
||||
node_cores: 3
|
||||
node_mem: 4900
|
||||
port: 16818
|
||||
initstate: FUTURE
|
||||
notify: reconfigure slurm
|
||||
tags: slurm-config
|
||||
|
||||
- name: "create docker network to make service discovery work"
|
||||
docker_network:
|
||||
name: "{{ docker_network }}"
|
||||
state: present
|
||||
|
||||
- name: "create docker volume for shared access between nodes"
|
||||
docker_volume:
|
||||
name: slurm-shared
|
||||
state: present
|
||||
|
||||
# TODO: reserve some address using docker_network_info and assign as aux
|
||||
# address to enable slurmctld to get a static address in order to be
|
||||
# reachable from slurm running on docker host to enable submitting jobs.
|
||||
|
||||
- name: run slurm docker containers
|
||||
docker_container:
|
||||
name: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
@@ -98,6 +106,7 @@
|
||||
ports: "{{ item.exposed_ports | default([]) }}"
|
||||
networks:
|
||||
- name: "{{ docker_network }}"
|
||||
aliases: "{{ item.aliases | default(omit) }}"
|
||||
env:
|
||||
slurmuser: "{{ slurm_user }}"
|
||||
image: "{{ item.image }}"
|
||||
@@ -114,6 +123,10 @@
|
||||
label: "{{slurm_prefix}}-{{ item.machine }}"
|
||||
tags: slurm-config
|
||||
|
||||
- name: configure host system to integrate into slurm cluster
|
||||
import_tasks: host-config.yml
|
||||
when: slurm_hostsystem_cluster_access | default(False)
|
||||
|
||||
- name: export facts about slurm cluster to be used by other modules
|
||||
set_fact:
|
||||
slurm:
|
||||
|
@@ -9,6 +9,8 @@ ControlMachine=slurm-ctl
|
||||
AuthType=auth/munge
|
||||
#CheckpointType=checkpoint/none
|
||||
CryptoType=crypto/munge
|
||||
CommunicationParameters=NoAddrCache
|
||||
SlurmctldParameters=enable_configless
|
||||
#DisableRootJobs=NO
|
||||
#EnforcePartLimits=NO
|
||||
#Epilog=
|
||||
@@ -103,8 +105,10 @@ Waittime=0
|
||||
#MaxMemPerCPU=0
|
||||
#SchedulerTimeSlice=30
|
||||
SchedulerType=sched/backfill
|
||||
SelectType=select/linear
|
||||
# SelectType=select/linear
|
||||
SelectType=select/cons_res
|
||||
#SelectTypeParameters=
|
||||
SelectTypeParameters=CR_CORE
|
||||
#
|
||||
#
|
||||
# JOB PRIORITY
|
||||
@@ -166,12 +170,24 @@ SlurmSchedLogFile={{slurm_log_path_sched}}
|
||||
NodeName=slurm-exec[1-{{ num_nodes }}] CPUs={{ slurm_exec_node_cores }} {{''
|
||||
}} RealMemory={{ slurm_exec_node_mem }} {{''
|
||||
}} CoresPerSocket={{ slurm_exec_node_cores }} State=UNKNOWN
|
||||
# RealMemory=5964
|
||||
{% for p in partitions | default([]) %}
|
||||
NodeName={{ p.nodeprefix }}[1-{{ p.num_nodes }}] CPUs={{ p.node_cores }} {{''
|
||||
}} RealMemory={{ p.node_mem }} {{''
|
||||
}} CoresPerSocket={{ p.node_cores }} {{''
|
||||
}} {%- if p.port is defined %} Port={{ p.port}} {% endif %}{{''
|
||||
}} State={{ p.initstate | default('UNKNOWN') }}
|
||||
{% endfor %}
|
||||
{% for i in alloc_nodes -%}
|
||||
NodeName={{i.name}}
|
||||
{%- if i.hostname is defined %} NodeHostname={{i.hostname}} {% endif %}
|
||||
{%- if i.addr is defined %} NodeAddr={{i.addr}} {% endif %}
|
||||
State=UNKNOWN
|
||||
{% endfor %}
|
||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] {{ ''
|
||||
PartitionName=container Nodes=slurm-exec[1-{{num_nodes}}] {{ ''
|
||||
}} AllocNodes={{alloc_nodes |map(attribute='name') | join(',')}} {{ ''
|
||||
}} Default=YES MaxTime=INFINITE State=UP
|
||||
{% for p in partitions | default([]) %}
|
||||
PartitionName={{ p.name }} Nodes={{ p.nodeprefix }}[1-{{ p.num_nodes }}] {{ ''
|
||||
}} AllocNodes={{alloc_nodes |map(attribute='name') | join(',')}} {{ ''
|
||||
}} MaxTime=INFINITE State=UP
|
||||
{% endfor %}
|
||||
|
@@ -10,7 +10,8 @@ slurm_nodes_exec: | # extend range to execute nodes list
|
||||
{% set slurm_nodes_exec = [] %}
|
||||
{% for i in range(1, num_nodes+1) -%}
|
||||
{% set _ = slurm_nodes_exec.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd',
|
||||
'aliases':['drone%s'|format(i)]}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes_exec }}
|
||||
slurm_default_mounts:
|
||||
|
Reference in New Issue
Block a user