Compare commits

...

6 Commits

21 changed files with 219 additions and 238 deletions

View File

@@ -59,27 +59,35 @@
vars:
container_privileged: True
slurm_num_nodes: 10
roles:
tasks:
- name: "setup docker"
role: docker
import_role: name=docker
tags: docker
- name: "get facts from existing cobald instance (i.e. hostname)"
role: cobald_facts
import_role:
name: cobald
tasks_from: facts
vars:
container_name: cobald
tags: [ slurm, cobald ]
- name: "setup slurm test environment in docker containers"
role: slurm
import_role: name=slurm
vars:
slurm_user: slurm # or root
slurm_user_accounts:
- name: cobald
dir: /var/lib/cobald
num_nodes: "{{slurm_num_nodes}}"
extra_nodes:
- "{{cobald_container_hostname}}" # from cobald_facts, read or generated
docker_network: slurm
when: '"slurm" in group_names'
tags: slurm
- name: "install cobald"
role: cobald
import_role: name=cobald
vars:
cobald_slurm: True
container_name: cobald

View File

@@ -1,3 +1,4 @@
cobald_domainname: cobald.local
influx_admin_user: my-user
influx_admin_pw: my-password
influx_org: my-org

View File

@@ -0,0 +1,34 @@
- file:
path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/"
state: directory
owner: "{{unpriv_user}}"
group: docker
- template:
src: cobald.Dockerfile
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/Dockerfile"
owner: "{{unpriv_user}}"
group: docker
register: cobald_cp_dockerfile
- copy:
src: "{{item}}"
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/{{item}}"
owner: "{{unpriv_user}}"
group: docker
mode: 0755
with_items:
- cobald-entrypoint.sh
- init-cobaldmodules.sh
register: cobald_cp_files
- docker_image:
name: "cobald"
tag: "{{cobald_image_tag|default('latest')}}"
# pull: False
build:
pull: False
path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/"
source: build
force_source: "{{cobald_cp_dockerfile.changed or cobald_cp_files.changed}}"

View File

@@ -23,11 +23,11 @@
cleanup: True
networks_cli_compatible: True
- add_host:
- name: add ansible connection to influxdb container
add_host:
name: ed-influxdb
ansible_connection: docker
# ansible_docker_extra_args: "-H=ssh://ed-c7-1.virt.magni.thoto.net" # FIXME
ansible_docker_extra_args: "-H=ssh://{{ansible_host}}" # FIXME
ansible_docker_extra_args: "-H=ssh://{{ansible_host}}"
changed_when: False
- name: wait for influx to run
@@ -35,32 +35,33 @@
changed_when: False
delegate_to: ed-influxdb
- name: fetch auth token
- name: fetch influxdb auth token
raw: influx auth list --user my-user --hide-headers --json
register: influx_token_fetch
changed_when: False
delegate_to: ed-influxdb
- name: set admin token
- name: set influxdb admin token
set_fact:
influx_admin_token:
"{{(influx_token_fetch.stdout | from_json | first).token}}"
- name: create bucket for cobald
- name: create influxdb bucket for cobald
influx_bucket:
base: "http://localhost:{{influx_pubport}}"
org: "my-org"
auth_token: "{{influx_admin_token}}"
name: "{{influx_bucket}}"
- name: create dashboard
- name: create influxdb dashboard
influx_dashboard:
base: "http://localhost:{{influx_pubport}}"
org: "my-org"
auth_token: "{{influx_admin_token}}"
data: "{{lookup('file', 'influxdb-dashboard-cobald.json')}}"
when: influxdb_dashboard | default(True)
- name: create token for telegraf
- name: create influxdb write access token for telegraf
influx_token:
base: "http://localhost:{{influx_pubport}}"
org: "my-org"

View File

@@ -1,62 +1,54 @@
- include_vars: cobald-slurm.yml
when: cobald_slurm | default(False)
- file:
path: "/container/{{item.name}}/cobald{{item.pfx|default('')}}/"
- name: build cobald:slurm docker image
include_role:
name: slurm
tasks_from: dockerimage
vars:
slurm_image_prefix: cobald
image_name: "{{cobald_image_tag}}"
dockerfile: "{{ lookup('template', 'cobald.Dockerfile') }}"
files:
- dest: cobald-entrypoint.sh
content: "{{ lookup('file', 'cobald-entrypoint.sh') }}"
- dest: init-cobaldmodules.sh
content: "{{ lookup('file', 'init-cobaldmodules.sh') }}"
when: cobald_slurm | default(False)
- name: build generic cobald docker image
include_tasks: dockerimage-generic.yml
when: not (cobald_slurm | default(False))
- name: make cobald data volume
file:
path: "/container/volumes/cobald/"
state: directory
owner: "{{unpriv_user}}"
group: docker
loop:
- name: docker-images
pfx: ".{{cobald_image_tag|default('latest')}}"
- name: volumes
- template:
src: cobald.Dockerfile
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/Dockerfile"
owner: "{{unpriv_user}}"
group: docker
register: cobald_cp_dockerfile
- copy:
src: "{{item}}"
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/{{item}}"
owner: "{{unpriv_user}}"
group: docker
mode: 0755
with_items:
- cobald-entrypoint.sh
- init-cobaldmodules.sh
register: cobald_cp_files
- docker_image:
name: "cobald"
tag: "{{cobald_image_tag|default('latest')}}"
# pull: False
build:
pull: False
path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/"
source: build
force_source: "{{cobald_cp_dockerfile.changed or cobald_cp_files.changed}}"
- copy:
- name: copy cobald config (does nothing yet)
copy:
src: cobald-config/
dest: /container/volumes/cobald
owner: "{{unpriv_user}}"
group: docker
when: False
- docker_network:
name: "{{cobald_docker_network}}" # FIXME
- name: ensure network for cobald container exists
docker_network:
name: "{{cobald_docker_network}}"
state: present
# docker run -v $(pwd)/cobald-config-host:/etc/cobald -v $(pwd)/cobald:/cobald --rm -it cobald bash
- yum:
- name: install git
yum:
name: git
state: present
- file:
- name: make directories for cobald configuration and modules
file:
path: "{{item}}"
owner: "{{unpriv_user}}"
group: "{{unpriv_user}}"
@@ -66,7 +58,8 @@
- "~{{unpriv_user}}/cobald/modules"
- "~{{unpriv_user}}/cobald"
- git:
- name: clone cobald code from git
git:
repo: https://github.com/thoto/cobald
dest: "~{{unpriv_user}}/cobald-src"
version: bugfix/mixed_construction_methods
@@ -75,7 +68,8 @@
become_user: "{{unpriv_user}}"
register: cobald_git_pull
- git:
- name: clone tardis code from git
git:
repo: https://github.com/MatterMiners/tardis
dest: "~{{unpriv_user}}/tardis-src"
version: master
@@ -88,7 +82,7 @@
database: passwd
key: "{{unpriv_user}}"
- name: run pip install
- name: run pip install on cobald and tardis
docker_container:
image: "cobald:{{cobald_image_tag|default('latest')}}"
name: "cobald-src-{{item.name}}-install"
@@ -111,8 +105,7 @@
- import_tasks: telegraf.yml
- name: get cobald hostname
include_role:
name: cobald_facts
include_tasks: facts.yml
when: cobald_container_hostname is not defined
- name: run cobald container

View File

@@ -1,4 +1,5 @@
- file:
- name: setup directories for telegraf
file:
path: "/container/{{item}}/telegraf/"
state: directory
owner: "{{unpriv_user}}"
@@ -7,14 +8,16 @@
- docker-images
- volumes
- copy:
- name: copy telegraf Dockerfile
copy:
src: telegraf.Dockerfile
dest: /container/docker-images/telegraf/Dockerfile
owner: "{{unpriv_user}}"
group: docker
register: cobald_cp_telegraf_dockerfile
- copy: # telegraf is found in influxdb repo
- name: copy telegraf repo file
copy: # telegraf is found in influxdb repo
src: influxdb.repo
dest: /container/docker-images/telegraf/influxdb.repo
owner: "{{unpriv_user}}"
@@ -29,20 +32,9 @@
source: build
force_source: "{{cobald_cp_telegraf_dockerfile.changed}}"
- set_fact:
influx_hostname: "ed-influxdb"
influx_domainname: "cobald.local"
influx_bucket: batleth
tags: influxdb
- import_tasks: influxdb.yml
tags: influxdb
- set_fact:
# influx_url: "http://{{influx_hostname}}.{{influx_domainname}}:8086"
influx_url: "http://{{influx_hostname}}:8086"
tags: influxdb
- name: generate telegraf config
template:
src: telegraf.conf.j2
@@ -51,6 +43,7 @@
group: docker
vars:
influx_token: "{{influx_telegraf_token.token}}"
influx_url: "http://{{influx_hostname}}:8086"
register: telegraf_config_gen
- name: run telegraf container
@@ -70,4 +63,6 @@
networks_cli_compatible: True
- import_tasks: grafana.yml
vars:
influx_url: "http://{{influx_hostname}}:8086"
tags: influxdb

View File

@@ -11,7 +11,8 @@ RUN git clone $REPOCOBALD /usr/local/src/cobald && \
git clone $REPOTARDIS /usr/local/src/tardis
RUN mkdir /etc/cobald /var/log/cobald && \
useradd -m -d /var/lib/cobald --no-log-init --system cobald && \
( getent passwd cobald > /dev/null || \
useradd -m -d /var/lib/cobald --no-log-init --system cobald ) && \
chown cobald:cobald /var/log/cobald
#RUN mkdir /cobald && python3 -m venv /cobald && source /cobald/bin/activate &&\

View File

@@ -1,5 +1,5 @@
cobald_image_tag: slurm
cobald_docker_base_image: "{{slurm.base_image}}"
cobald_docker_base_image: "slurm:slurmd"
cobald_docker_default_command: False
cobald_docker_network: "{{slurm.network}}"
cobald_domainname: "{{slurm.domain}}"

View File

@@ -1 +1,3 @@
cobald_docker_network: "{{docker_network}}"
influx_domainname: "{{ cobald_domainname }}"
influx_hostname: "ed-influxdb"

View File

@@ -1,8 +1,9 @@
container_privileged: False
slurm_user: slurm
slurm_log_path_ctld: /var/log/slurm/slurmctld.log
slurm_log_path_d: /var/log/slurm/slurmd.log
slurm_log_path_sched: /var/log/slurm/slurmsched.log
slurm_prefix: slurm
slurm_domain: slurm.local
container_privileged: False
docker_network: slurm
slurm_image_prefix: slurm

View File

@@ -28,4 +28,8 @@ RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
COPY start-scripts/10-munge /etc/docker-init.d/10-munge
RUN chmod 755 /etc/docker-init.d/10-munge
ARG moreusers
RUN function mu { [ -z "$1" ] || useradd -d $2 -m --no-log-init --system $1 ;};\
echo "${moreusers}" | tr ',' '\n' | while read i ; do mu $i ; done
CMD /usr/local/sbin/docker-init

View File

@@ -1,50 +1,29 @@
- file:
path: "/container/docker-images/slurm"
state: directory
owner: "{{unpriv_user}}"
group: docker
- copy: # FIXME: template
src: "{{image.name}}.Dockerfile"
dest: "/container/docker-images/slurm/{{image.name}}.Dockerfile"
owner: "{{unpriv_user}}"
group: docker
register: slurm_cp_dockerfile
- name: copy entrypoint and docker-init
copy: # FIXME: swap out
src: "{{item}}"
dest: "/container/docker-images/slurm/{{item}}"
owner: root
group: root
mode: u=rwx,g=rx,o=rx
loop:
- entrypoint.sh
- docker-init
register: slurm_cp_entrypt
- name: copy startup scripts
copy:
src: "start-scripts/"
dest: "/container/docker-images/slurm/start-scripts/"
owner: root
group: root
mode: u=rwx,g=rx,o=rx
register: slurm_cp_stscrs
- name: build slurm base docker image
include_tasks: dockerimage_build.yml
vars:
slurm_image_prefix: "{{slurm_base_image_prefix | default('slurm') }}"
image_name: base
dockerfile: "{{lookup('file', 'slurm-base.Dockerfile')}}"
files:
- dest: entrypoint.sh
content: "{{ lookup('file', 'entrypoint.sh') }}"
- dest: entry-munge.sh
content: "{{ lookup('file', 'entry-munge.sh') }}"
- dest: docker-init
content: "{{ lookup('file', 'docker-init') }}"
- dest: start-scripts/10-munge
content: "{{ lookup('file', 'start-scripts/10-munge') }}"
image_args:
moreusers: >-
{% for a in slurm_user_accounts | default([]) -%}
{{a['name']}} {{a['dir']}}{{loop.last | ternary('',',')}}
{%- endfor %}
when: not slurm_baseimg_build_chg | default(False)
- set_fact:
slurm_image_files_changed: "{{ (slurm_image_files_changed | default(False))
or slurm_cp_entrypt.changed or slurm_cp_stscrs.changed }}"
- docker_image:
name: "slurm"
tag: "{{image.name}}"
# pull: False
build:
pull: False
path: "/container/docker-images/slurm/"
dockerfile: "{{image.name}}.Dockerfile"
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
source: build
force_source: "{{slurm_cp_dockerfile.changed or slurm_image_files_changed}}"
slurm_baseimg_build_chg:
"{{(slurm_baseimg_build_chg | default(False)) or
slurm_img_build.changed}}"
- name: "build slurm base docker image {{image_name}}"
include_tasks: dockerimage_build.yml

View File

@@ -0,0 +1,43 @@
- name: create directories for docker image build
file:
path: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/{{item}}"
state: directory
owner: "{{unpriv_user}}"
group: docker
loop: "{{ [''] + (files | map(attribute='dest') | map('dirname') |
unique | select | list) }}"
- name: "copy Dockerfile {{slurm_image_prefix}}:{{image_name}}"
copy:
content: "{{dockerfile}}"
dest: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/Dockerfile"
owner: "{{unpriv_user}}"
group: docker
register: slurm_cp_dockerfile
- name: copy requisite files
copy:
content: "{{ item.content }}"
dest: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/{{item.dest}}"
owner: root
group: root
mode: u=rwx,g=rx,o=rx
loop: "{{ files | default([]) }}"
loop_control:
label: "{{ item.dest }}"
register: slurm_cp_files
- name: "build docker image {{slurm_image_prefix}}:{{image_name}}"
docker_image:
name: "{{slurm_image_prefix}}"
tag: "{{image_name}}"
# pull: False
build:
args: "{{image_args | default(omit)}}"
pull: False
path: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/"
source: build
force_source: "{{slurm_cp_dockerfile.changed or
slurm_cp_files.changed or
slurm_baseimg_build_chg | default(False) }}"
register: slurm_img_build

View File

@@ -3,9 +3,10 @@
name: [ slurm, slurm-doc ]
state: present
- include_role:
name: slurm_dockerimage
loop: # FIXME: default(omit)!
- name: build docker images for slurm
include_tasks:
file: dockerimage.yml
loop:
- name: slurmctld
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
files:
@@ -16,6 +17,10 @@
files:
- dest: start-scripts/30-slurmd
content: "{{ lookup('file', 'start-scripts/30-slurmd') }}"
vars:
image_name: "{{image.name | default(omit) }}"
dockerfile: "{{image.dockerfile | default(omit) }}"
files: "{{image.files | default(omit) }}"
loop_control:
loop_var: image
label: "{{ image.name }}"
@@ -32,7 +37,8 @@
group: munge
mode: u=rw,g=,o=
- file:
- name: create munge key directory for containers
file:
path: /container/volumes/munge
state: directory
owner: munge
@@ -47,7 +53,8 @@
src: /etc/munge/munge.key
dest: /container/volumes/munge/munge.key
- file:
- name: make slurm directory
file:
path: /container/volumes/slurm/
state: directory
@@ -74,29 +81,6 @@
name: slurm-shared
state: present
- set_fact:
slurm_nodes_std: # default nodes: controller and submit machine
- machine: ctl
image: slurm:slurmctld
exposed_ports: [ "6817:6817/tcp" ]
- machine: submit1
image: slurm:slurmd
extra_mounts:
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
slurm_nodes_exec: | # extend range to execute nodes list
{% set slurm_nodes_exec = slurm_nodes_exec | default([]) %}
{% for i in range(1, num_nodes+1) -%}
{% set _ = slurm_nodes_exec.extend([
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
{%- endfor %}
{{ slurm_nodes_exec }}
slurm_default_mounts:
- /container/volumes/slurm/:/etc/slurm/:rw
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
- slurm-shared:/shared/:rw
slurm_network: "{{docker_network}}"
tags: [ slurm-config ]
# TODO: reserve some address using docker_network_info and assign as aux
# address to enable slurmctld to get a static address in order to be
# reachable from slurm running on docker host to enable submitting jobs.
@@ -109,7 +93,7 @@
volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}"
ports: "{{ item.exposed_ports | default([]) }}"
networks:
- name: "{{ slurm_network }}"
- name: "{{ docker_network }}"
env:
slurmuser: "{{ slurm_user }}"
image: "{{ item.image }}"
@@ -119,14 +103,14 @@
privileged: "{{ container_privileged | bool }}"
networks_cli_compatible: True
interactive: True
vars:
vars: # see vars/main.yml
slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}"
loop: "{{ slurm_nodes_all }}"
loop_control:
label: "{{slurm_prefix}}-{{ item.machine }}"
tags: [ slurm-config ]
- name: set facts to be used by other modules
- name: export facts about slurm cluster to be used by other modules
set_fact:
slurm:
user: "{{slurm_user}}"
@@ -134,3 +118,4 @@
base_image: "slurm:base"
mounts: "{{slurm_default_mounts}}"
network: "{{docker_network}}"
tags: always

20
roles/slurm/vars/main.yml Normal file
View File

@@ -0,0 +1,20 @@
slurm_nodes_std: # default nodes: controller and submit machine
- machine: ctl
image: slurm:slurmctld
exposed_ports: [ "6817:6817/tcp" ]
- machine: submit1
image: slurm:slurmd
extra_mounts:
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
slurm_nodes_exec: | # extend range to execute nodes list
{% set slurm_nodes_exec = [] %}
{% for i in range(1, num_nodes+1) -%}
{% set _ = slurm_nodes_exec.extend([
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
{%- endfor %}
{{ slurm_nodes_exec }}
slurm_default_mounts:
- /container/volumes/slurm/:/etc/slurm/:rw
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
- slurm-shared:/shared/:rw

View File

@@ -1,40 +0,0 @@
- name: create directories for docker image build
file:
path: "/container/docker-images/slurm-{{image.name}}/{{item}}"
state: directory
owner: "{{unpriv_user}}"
group: docker
loop: "{{ [''] + (image.files | map(attribute='dest') | map('dirname') |
unique | select | list) }}"
- name: copy Dockerfile
copy:
src: "{{image.name}}.Dockerfile"
dest: "/container/docker-images/slurm-{{image.name}}/Dockerfile"
owner: "{{unpriv_user}}"
group: docker
register: slurm_cp_dockerfile
- name: copy requisite files
copy:
content: "{{ item.content }}"
dest: "/container/docker-images/slurm-{{image.name}}/{{item.dest}}"
owner: root
group: root
mode: u=rwx,g=rx,o=rx
loop: "{{ image.files | default([]) }}"
loop_control:
label: "{{ item.dest }}"
register: slurm_cp_files
- docker_image:
name: "slurm"
tag: "{{image.name}}"
# pull: False
build:
pull: False
path: "/container/docker-images/slurm-{{image.name}}/"
source: build
force_source: "{{slurm_cp_dockerfile.changed or
slurm_cp_files.changed or
slurm_baseimg_build_chg }}"

View File

@@ -1,46 +0,0 @@
- file:
path: "/container/docker-images/slurm-base/start-scripts"
state: directory
owner: "{{unpriv_user}}"
group: docker
# - name: copy Dockerfile, entrypoint, docker-init and munge startup
- name: copy slurm base image requisite files
copy: # FIXME: swap out
src: "{{item.file}}"
dest: "/container/docker-images/slurm-base/{{item.file}}"
owner: "{{unpriv_user}}"
group: docker
mode: "{{ item.perms | default('u=rwx,g=rx,o=rx') }}"
loop:
- file: slurm-base.Dockerfile
perms: u=rw,g=r,o=r
- file: entrypoint.sh
- file: entry-munge.sh
- file: docker-init
- file: start-scripts/10-munge
when: not (slurm_baseimg_build_chg | default(False))
register: slurm_baseimg_copy
- name: build base image
docker_image:
name: "slurm"
tag: "base"
# pull: False
build:
pull: False
path: "/container/docker-images/slurm-base/"
dockerfile: "slurm-base.Dockerfile"
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
source: build
force_source: "{{slurm_baseimg_copy.changed}}"
# when: run only once but keep changed state
when: not (slurm_baseimg_build_chg | default(False))
register: slurm_baseimg_build
- set_fact:
slurm_baseimg_build_chg:
"{{(slurm_baseimg_build_chg | default(False)) or
slurm_baseimg_build.changed}}"
- include_tasks: dockerimage.yml