fix hostname of cobald slurm node
made cobald be able to run slurm jobs, previously failing with permission denied.
This commit is contained in:
13
play.yml
13
play.yml
@@ -81,7 +81,18 @@
|
|||||||
dir: /var/lib/cobald
|
dir: /var/lib/cobald
|
||||||
num_nodes: "{{slurm_num_nodes}}"
|
num_nodes: "{{slurm_num_nodes}}"
|
||||||
extra_nodes:
|
extra_nodes:
|
||||||
- "{{cobald_container_hostname}}" # from cobald_facts, read or generated
|
- name: cobald
|
||||||
|
hostname: "{{cobald_container_hostname}}" # from cobald/facts.yml above
|
||||||
|
# hostname is used as NodeHostname, which is used slurms "networking
|
||||||
|
# code" (https://bugs.schedmd.com/show_bug.cgi?id=8615).
|
||||||
|
# It works either way around, but one of NodeName or NodeHostname has
|
||||||
|
# to match the container name (-n flag, not --hostname) since when
|
||||||
|
# submitting tasks to the slurm controller, it matches access
|
||||||
|
# permissions against a reverse lookup of the submitting ip address.
|
||||||
|
# Docker always and unconfigureably resolves the container ip in any
|
||||||
|
# network to containername.netname, where containername is the
|
||||||
|
# containers runtime name (not hostname supplied!) and netname is
|
||||||
|
# the network name in host environment. We should run our own dns...
|
||||||
docker_network: slurm
|
docker_network: slurm
|
||||||
when: '"slurm" in group_names'
|
when: '"slurm" in group_names'
|
||||||
tags: slurm
|
tags: slurm
|
||||||
|
@@ -118,9 +118,6 @@
|
|||||||
networks:
|
networks:
|
||||||
- name: "{{cobald_docker_network}}"
|
- name: "{{cobald_docker_network}}"
|
||||||
networks_cli_compatible: True
|
networks_cli_compatible: True
|
||||||
# env:
|
|
||||||
# slurmuser: "{{slurm_user}}"
|
|
||||||
# privileged: "{{ container_privileged | bool }}"
|
|
||||||
state: started
|
state: started
|
||||||
detach: True
|
detach: True
|
||||||
cleanup: True
|
cleanup: True
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
cobald_image_tag: slurm
|
cobald_image_tag: slurm
|
||||||
cobald_docker_base_image: "slurm:slurmd"
|
cobald_docker_base_image: "{{slurm.base_image}}"
|
||||||
cobald_docker_default_command: False
|
cobald_docker_default_command: False
|
||||||
cobald_docker_network: "{{slurm.network}}"
|
cobald_docker_network: "{{slurm.network}}"
|
||||||
cobald_domainname: "{{slurm.domain}}"
|
cobald_domainname: "{{slurm.domain}}"
|
||||||
|
@@ -67,7 +67,9 @@
|
|||||||
- slurm.conf
|
- slurm.conf
|
||||||
- cgroup.conf
|
- cgroup.conf
|
||||||
vars:
|
vars:
|
||||||
alloc_nodes: "{{ [ slurm_prefix+'-submit1' ] + extra_nodes | default([])}}"
|
slurm_alloc_nodes_default:
|
||||||
|
- name: "{{slurm_prefix+'-submit1'}}"
|
||||||
|
alloc_nodes: "{{ slurm_alloc_nodes_default + extra_nodes | default([])}}"
|
||||||
notify: reconfigure slurm
|
notify: reconfigure slurm
|
||||||
tags: [ slurm-config ]
|
tags: [ slurm-config ]
|
||||||
|
|
||||||
|
@@ -165,6 +165,8 @@ SlurmSchedLogFile={{slurm_log_path_sched}}
|
|||||||
# COMPUTE NODES
|
# COMPUTE NODES
|
||||||
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
|
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
|
||||||
{% for i in alloc_nodes -%}
|
{% for i in alloc_nodes -%}
|
||||||
NodeName={{i}} State=UNKNOWN
|
NodeName={{i.name}}
|
||||||
|
{%- if i.hostname is defined %} NodeHostname={{i.hostname}} {% endif %}
|
||||||
|
State=UNKNOWN
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes={{alloc_nodes | join(',')}} Default=YES MaxTime=INFINITE State=UP
|
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes={{alloc_nodes |map(attribute='name') | join(',')}} Default=YES MaxTime=INFINITE State=UP
|
||||||
|
Reference in New Issue
Block a user