Compare commits
3 Commits
574d2fcb4e
...
cobaldtard
Author | SHA1 | Date | |
---|---|---|---|
4c63f2a825
|
|||
51390bb321
|
|||
52022a3013
|
@@ -43,6 +43,7 @@
|
||||
# containers runtime name (not hostname supplied!) and netname is
|
||||
# the network name in host environment. We should run our own dns...
|
||||
docker_network: slurm
|
||||
slurm_hostsystem_cluster_access: True
|
||||
when: '"slurm" in group_names'
|
||||
tags: slurm, cobald, influxdb, slurm-config
|
||||
# tags: cobald requires some slurm facts, so cobald tag is included here
|
||||
|
31
roles/cobald/files/cobald-config/config.yaml
Normal file
31
roles/cobald/files/cobald-config/config.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
---
|
||||
pipeline:
|
||||
- __type__: cobald.controller.linear.LinearController
|
||||
low_utilisation: 0.9
|
||||
high_allocation: 0.9
|
||||
rate: 0.10
|
||||
- !Limiter
|
||||
minimum: 3
|
||||
- !TelegrafPipelineMonitor
|
||||
poll: True
|
||||
- !TardisPoolFactory
|
||||
configuration: /etc/cobald/tardis.yaml
|
||||
logging:
|
||||
version: 1
|
||||
root:
|
||||
level: DEBUG
|
||||
handlers: [console, file]
|
||||
handlers:
|
||||
console:
|
||||
class: logging.StreamHandler
|
||||
formatter: test
|
||||
level: DEBUG
|
||||
stream: ext://sys.stderr
|
||||
file:
|
||||
class: logging.handlers.RotatingFileHandler
|
||||
formatter: test
|
||||
level: WARNING
|
||||
filename: /var/log/cobald/cobald-tardis.log
|
||||
formatters:
|
||||
test:
|
||||
format: " %(name)s %(message)s"
|
41
roles/cobald/files/cobald-config/tardis.yaml
Normal file
41
roles/cobald/files/cobald-config/tardis.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
Plugins:
|
||||
SqliteRegistry:
|
||||
db_file: /tmp/drone_registry.db
|
||||
TelegrafMonitoring:
|
||||
host: ed-telegraf
|
||||
port: 8094
|
||||
#BatchSystem:
|
||||
# adapter: FakeBatchSystem
|
||||
# allocation: 1.0
|
||||
# utilisation: !PeriodicValue
|
||||
# period: 60
|
||||
# amplitude: 0.15
|
||||
# offset: 0.80
|
||||
## phase: 1.
|
||||
# phase: 1.6
|
||||
# machine_status: Available
|
||||
BatchSystem:
|
||||
adapter: Slurm
|
||||
max_age: 0.1
|
||||
options:
|
||||
partition: cobald
|
||||
Sites:
|
||||
- name: slurmtest
|
||||
adapter: Slurm
|
||||
quota: 20
|
||||
slurmtest:
|
||||
# executor: ...
|
||||
StatusUpdate: 0.1
|
||||
MachineTypes:
|
||||
- m1.a
|
||||
MachineTypeConfiguration:
|
||||
m1.a:
|
||||
Walltime: 5
|
||||
Partition: container
|
||||
StartupCommand: /usr/local/bin/start-drone
|
||||
# SubmitOptions: ...
|
||||
MachineMetaData:
|
||||
m1.a:
|
||||
Cores: 3 # cores
|
||||
Memory: 1 # GB
|
||||
Disk: 4 # not passed
|
@@ -10,15 +10,16 @@
|
||||
slurm_image_prefix: cobald
|
||||
image_name: "{{cobald_image_tag}}"
|
||||
dockerfile: "{{ lookup('template', 'cobald.Dockerfile') }}"
|
||||
files:
|
||||
- dest: cobald-entrypoint.sh
|
||||
content: "{{ lookup('file', 'cobald-entrypoint.sh') }}"
|
||||
- dest: init-cobaldmodules.sh
|
||||
content: "{{ lookup('file', 'init-cobaldmodules.sh') }}"
|
||||
- dest: start-drone
|
||||
content: "{{ lookup('file', 'start-drone') }}"
|
||||
- dest: 28-sync-container-slurmd
|
||||
content: "{{ lookup('file', '28-sync-container-slurmd') }}"
|
||||
files_list:
|
||||
- cobald-entrypoint.sh
|
||||
- init-cobaldmodules.sh
|
||||
- start-drone
|
||||
- 28-sync-container-slurmd
|
||||
files: "
|
||||
{%- set files = [] -%} {%- for i in files_list -%}
|
||||
{%- set files = files.append(
|
||||
{ 'dest': i, 'content': lookup('file', i) }) -%}
|
||||
{%- endfor %}{{ files }}"
|
||||
when: cobald_slurm | default(False)
|
||||
|
||||
- name: build generic cobald docker image
|
||||
@@ -32,13 +33,14 @@
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
|
||||
- name: copy cobald config (does nothing yet)
|
||||
- name: copy cobald config
|
||||
copy:
|
||||
src: cobald-config/
|
||||
dest: /container/volumes/cobald
|
||||
dest: "~{{unpriv_user}}/cobald/"
|
||||
force: False
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
when: False
|
||||
mode: "0644"
|
||||
|
||||
- name: ensure network for cobald container exists
|
||||
docker_network:
|
||||
@@ -68,7 +70,7 @@
|
||||
repo: https://github.com/thoto/cobald
|
||||
dest: "~{{unpriv_user}}/cobald-src"
|
||||
version: bugfix/mixed_construction_methods
|
||||
update: no
|
||||
update: no # FIXME
|
||||
become: yes
|
||||
become_user: "{{unpriv_user}}"
|
||||
register: cobald_git_pull
|
||||
@@ -78,6 +80,7 @@
|
||||
repo: https://github.com/MatterMiners/tardis
|
||||
dest: "~{{unpriv_user}}/tardis-src"
|
||||
version: master
|
||||
update: no # FIXME
|
||||
become: yes
|
||||
become_user: "{{unpriv_user}}"
|
||||
register: tardis_git_pull
|
||||
|
@@ -1,3 +1,6 @@
|
||||
# TODO: this does not work quite right since slurm-ctl does not reach the host
|
||||
# system. sinfo, scontrol etc. work but srun does not!
|
||||
|
||||
- name: "get addresses from docker network"
|
||||
docker_network_info:
|
||||
name: "{{ docker_network }}"
|
||||
@@ -28,4 +31,5 @@
|
||||
- name: start munge locally
|
||||
service:
|
||||
name: munge
|
||||
enabled: True
|
||||
state: started
|
||||
|
@@ -92,18 +92,11 @@
|
||||
notify: reconfigure slurm
|
||||
tags: slurm-config
|
||||
|
||||
- import_tasks: host-config.yml
|
||||
when: slurm_hostsystem_cluster_access | default(False)
|
||||
|
||||
- name: "create docker volume for shared access between nodes"
|
||||
docker_volume:
|
||||
name: slurm-shared
|
||||
state: present
|
||||
|
||||
# TODO: reserve some address using docker_network_info and assign as aux
|
||||
# address to enable slurmctld to get a static address in order to be
|
||||
# reachable from slurm running on docker host to enable submitting jobs.
|
||||
|
||||
- name: run slurm docker containers
|
||||
docker_container:
|
||||
name: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
@@ -130,6 +123,10 @@
|
||||
label: "{{slurm_prefix}}-{{ item.machine }}"
|
||||
tags: slurm-config
|
||||
|
||||
- name: configure host system to integrate into slurm cluster
|
||||
import_tasks: host-config.yml
|
||||
when: slurm_hostsystem_cluster_access | default(False)
|
||||
|
||||
- name: export facts about slurm cluster to be used by other modules
|
||||
set_fact:
|
||||
slurm:
|
||||
|
Reference in New Issue
Block a user