Compare commits
5 Commits
962d9b5ac9
...
e78e184375
Author | SHA1 | Date | |
---|---|---|---|
e78e184375
|
|||
02e87d7c40
|
|||
4450c9bb65
|
|||
6eb6984d6a
|
|||
cc43a39ea3 |
29
play.yml
29
play.yml
@@ -1,6 +1,8 @@
|
||||
---
|
||||
- hosts: all
|
||||
vars_files: ['vars-auth.yml']
|
||||
vars_files:
|
||||
- vars-auth.yml
|
||||
- vars-influx.yml
|
||||
tasks:
|
||||
- name: "install epel repo" # for htop etc.
|
||||
yum:
|
||||
@@ -53,27 +55,34 @@
|
||||
- htcondor-containered
|
||||
- htcondor
|
||||
|
||||
- hosts: slurm
|
||||
- hosts: slurm, cobald
|
||||
vars:
|
||||
container_privileged: True
|
||||
num_nodes: 3
|
||||
slurm_num_nodes: 10
|
||||
roles:
|
||||
- name: "setup docker"
|
||||
role: docker
|
||||
tags: docker
|
||||
- name: "get facts from existing cobald instance (i.e. hostname)"
|
||||
role: cobald_facts
|
||||
vars:
|
||||
container_name: cobald
|
||||
tags: [ slurm, cobald ]
|
||||
- name: "setup slurm test environment in docker containers"
|
||||
role: slurm
|
||||
vars:
|
||||
slurm_user: slurm # or root
|
||||
num_nodes: "{{slurm_num_nodes}}"
|
||||
extra_nodes:
|
||||
- "{{cobald_container_hostname}}" # from cobald_facts, read or generated
|
||||
docker_network: slurm
|
||||
when: '"slurm" in group_names'
|
||||
tags: slurm
|
||||
|
||||
- hosts: cobald
|
||||
roles:
|
||||
- name: "setup docker"
|
||||
role: docker
|
||||
tags: docker
|
||||
- name: "install cobald"
|
||||
role: cobald
|
||||
vars:
|
||||
docker_network: slurm
|
||||
cobald_slurm: True
|
||||
container_name: cobald
|
||||
# docker_network: slurm # overriden by vars/slurm.yml
|
||||
when: '"cobald" in group_names'
|
||||
tags: cobald
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
[ -f /usr/local/lib/cobaldmodules/setup.py -a \
|
||||
-d /usr/local/lib/cobaldmodules/cobaldmodules ] && \
|
||||
pip3 install --no-deps --editable /usr/local/lib/cobaldmodules
|
||||
for i in /usr/local/lib/entrypoints.d/* ; do
|
||||
[ -f $i ] && /bin/sh $i || break
|
||||
done
|
||||
|
||||
exec "${@:-/bin/bash}"
|
||||
|
@@ -16,7 +16,7 @@
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 1,
|
||||
"iteration": 1623184036048,
|
||||
"iteration": 1623317629899,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
@@ -99,7 +99,7 @@
|
||||
],
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "nodes = from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> keep(columns: [\"tardis_machine_name\", \"_time\"])\n |> sort(columns: [\"_time\"], desc: true)\n |> unique(column: \"tardis_machine_name\")\n// |> yield()\n\noffset = from(bucket: \"batleth\")\n |> range(start: 0, stop: v.timeRangeStart)\n |> filter(fn: (r) => r._field == \"state\")\n |> group(columns: [\"tardis_machine_name\", \"machine_type\"])\n |> reduce(fn: (r, accumulator) => ({\n _value: accumulator._value + (\n if r._value == \"AvailableState\" then 1 \n else if r._value == \"DownState\" then -1 \n else 0)\n }), identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n\nnew = from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._field == \"state\")\n// |> filter(fn: (r) => r.tardis_machine_name == \"${machine}\")\n |> group(columns: [\"tardis_machine_name\", \"machine_type\"])\n |> window(every: $__interval)\n |> reduce(fn: (r, accumulator) => ({\n _value: accumulator._value + (\n if r._value == \"AvailableState\" then 1 \n else if r._value == \"DownState\" then -1 \n else 0)\n }), identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n\nunion(tables: [offset, new])\n |> window(every: inf)\n |> cumulativeSum()\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> yield()",
|
||||
"query": "f_r = (r, accumulator) => ({\n _value: accumulator._value + (\n if r._value == \"AvailableState\" then 1\n else if r._value == \"DownState\" then -1\n else 0)\n })\n\nnodes = from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> keep(columns: [\"tardis_machine_name\", \"_time\"])\n |> sort(columns: [\"_time\"], desc: true)\n |> unique(column: \"tardis_machine_name\")\n// |> yield()\n\noffset = from(bucket: \"batleth\")\n |> range(start: 0, stop: v.timeRangeStart)\n |> filter(fn: (r) => r._field == \"state\")\n |> group(columns: [\"tardis_machine_name\", \"machine_type\"])\n |> reduce(fn: f_r, identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n\nnew = from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._field == \"state\")\n// |> filter(fn: (r) => r.tardis_machine_name == \"${machine}\")\n |> group(columns: [\"tardis_machine_name\", \"machine_type\"])\n |> window(every: $__interval)\n |> reduce(fn: f_r, identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n\nunion(tables: [offset, new])\n |> window(every: inf)\n |> cumulativeSum()\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> yield()\n",
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
@@ -550,10 +550,9 @@
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"isNone": true,
|
||||
"selected": false,
|
||||
"text": "None",
|
||||
"value": ""
|
||||
"text": "cobald-xvmcqc",
|
||||
"value": "cobald-xvmcqc"
|
||||
},
|
||||
"datasource": "InfluxDB",
|
||||
"definition": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> keep(columns: [\"tardis_machine_name\", \"_time\"])\n |> sort(columns: [\"_time\"], desc: true)\n |> unique(column: \"tardis_machine_name\")",
|
||||
@@ -586,5 +585,5 @@
|
||||
"timezone": "",
|
||||
"title": "cobald",
|
||||
"uid": "urDuvE6Gk",
|
||||
"version": 9
|
||||
"version": 2
|
||||
}
|
5
roles/cobald/files/init-cobaldmodules.sh
Normal file
5
roles/cobald/files/init-cobaldmodules.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
[ -f /usr/local/lib/cobaldmodules/setup.py -a \
|
||||
-d /usr/local/lib/cobaldmodules/cobaldmodules ] && \
|
||||
pip3 install --no-deps --editable /usr/local/lib/cobaldmodules
|
@@ -1,33 +1,41 @@
|
||||
- include_vars: cobald-slurm.yml
|
||||
when: cobald_slurm | default(False)
|
||||
|
||||
- file:
|
||||
path: "/container/{{item}}/cobald/"
|
||||
path: "/container/{{item.name}}/cobald{{item.pfx|default('')}}/"
|
||||
state: directory
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
loop:
|
||||
- docker-images
|
||||
- volumes
|
||||
- name: docker-images
|
||||
pfx: ".{{cobald_image_tag|default('latest')}}"
|
||||
- name: volumes
|
||||
|
||||
- copy:
|
||||
- template:
|
||||
src: cobald.Dockerfile
|
||||
dest: /container/docker-images/cobald/Dockerfile
|
||||
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/Dockerfile"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
register: cobald_cp_dockerfile
|
||||
|
||||
- copy:
|
||||
src: cobald-entrypoint.sh
|
||||
dest: /container/docker-images/cobald/cobald-entrypoint.sh
|
||||
src: "{{item}}"
|
||||
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/{{item}}"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
mode: 0755
|
||||
with_items:
|
||||
- cobald-entrypoint.sh
|
||||
- init-cobaldmodules.sh
|
||||
register: cobald_cp_files
|
||||
|
||||
- docker_image:
|
||||
name: "cobald"
|
||||
tag: "{{cobald_image_tag|default('latest')}}"
|
||||
# pull: False
|
||||
build:
|
||||
pull: False
|
||||
path: "/container/docker-images/cobald/"
|
||||
path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/"
|
||||
source: build
|
||||
force_source: "{{cobald_cp_dockerfile.changed or cobald_cp_files.changed}}"
|
||||
|
||||
@@ -39,7 +47,7 @@
|
||||
when: False
|
||||
|
||||
- docker_network:
|
||||
name: "{{docker_network}}"
|
||||
name: "{{cobald_docker_network}}" # FIXME
|
||||
state: present
|
||||
|
||||
# docker run -v $(pwd)/cobald-config-host:/etc/cobald -v $(pwd)/cobald:/cobald --rm -it cobald bash
|
||||
@@ -82,7 +90,7 @@
|
||||
|
||||
- name: run pip install
|
||||
docker_container:
|
||||
image: cobald
|
||||
image: "cobald:{{cobald_image_tag|default('latest')}}"
|
||||
name: "cobald-src-{{item.name}}-install"
|
||||
volumes:
|
||||
- "~{{unpriv_user}}/{{item.name}}-src:/usr/local/src/{{item.name}}:rw"
|
||||
@@ -102,29 +110,34 @@
|
||||
|
||||
- import_tasks: telegraf.yml
|
||||
|
||||
- docker_container_info:
|
||||
name: cobald
|
||||
register: cobald_container_info
|
||||
- name: get cobald hostname
|
||||
include_role:
|
||||
name: cobald_facts
|
||||
when: cobald_container_hostname is not defined
|
||||
|
||||
- docker_container:
|
||||
name: cobald
|
||||
image: cobald
|
||||
hostname: |-
|
||||
{{cobald_container_info.container.Config.Hostname | default('cobald-'+
|
||||
lookup('password', '/dev/null chars=ascii_lowercase length=6')) }}
|
||||
domainname: cobald.local
|
||||
volumes:
|
||||
- name: run cobald container
|
||||
docker_container:
|
||||
name: "{{ container_name | default('cobald') }}"
|
||||
image: "cobald:{{cobald_image_tag|default('latest')}}"
|
||||
hostname: "{{cobald_container_hostname}}"
|
||||
domainname: "{{ cobald_domainname | default('cobald.local')}}"
|
||||
volumes: "{{default_mounts + cobald_mounts }}"
|
||||
networks:
|
||||
- name: "{{cobald_docker_network}}"
|
||||
networks_cli_compatible: True
|
||||
# env:
|
||||
# slurmuser: "{{slurm_user}}"
|
||||
# privileged: "{{ container_privileged | bool }}"
|
||||
state: started
|
||||
detach: True
|
||||
cleanup: True
|
||||
interactive: True
|
||||
# command: python3 -m cobald.daemon /etc/cobald/config.yaml
|
||||
vars:
|
||||
default_mounts: "{{cobald_slurm_mounts | default([])}}"
|
||||
cobald_mounts:
|
||||
- "~{{unpriv_user}}/cobald:/etc/cobald"
|
||||
# - /container/volumes/cobald:/etc/cobald:ro
|
||||
- "~{{unpriv_user}}/cobald/modules:/usr/local/src/cobaldmodules"
|
||||
- "~{{unpriv_user}}/cobald-src:/usr/local/src/cobald:ro"
|
||||
- "~{{unpriv_user}}/tardis-src:/usr/local/src/tardis:ro"
|
||||
networks:
|
||||
- name: "{{docker_network}}"
|
||||
state: started
|
||||
detach: True
|
||||
cleanup: True
|
||||
interactive: True
|
||||
# command: /bin/bash
|
||||
# python3 -m cobald.daemon /etc/cobald/config.yaml
|
||||
command: python3 -m cobald.daemon /etc/cobald/config.yaml
|
||||
|
@@ -1,4 +1,4 @@
|
||||
FROM docker.io/library/centos:7
|
||||
FROM {{ cobald_docker_base_image | default("docker.io/library/centos:7") }}
|
||||
|
||||
RUN yum update -y && \
|
||||
yum install -y python3 git && pip3 install --upgrade pip && \
|
||||
@@ -41,11 +41,20 @@ VOLUME /usr/local/src/cobaldmodules
|
||||
|
||||
VOLUME /etc/cobald
|
||||
|
||||
COPY cobald-entrypoint.sh /usr/local/sbin/docker-entrypoint.sh
|
||||
RUN mkdir -p /usr/local/lib/entrypoints.d/
|
||||
|
||||
RUN chmod 755 /usr/local/sbin/docker-entrypoint.sh
|
||||
COPY init-cobaldmodules.sh /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/docker-entrypoint.sh" ]
|
||||
RUN chmod 755 /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||
|
||||
RUN echo -e "#!/bin/sh\npython3 -m cobald.daemon /etc/cobald/config.yaml" >> /etc/docker-init.d/70-cobald && chmod 755 /etc/docker-init.d/70-cobald
|
||||
|
||||
{% if cobald_docker_default_command | default(True) -%}
|
||||
COPY cobald-entrypoint.sh /usr/local/sbin/cobald-entrypoint.sh
|
||||
|
||||
RUN chmod 755 /usr/local/sbin/cobald-entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/cobald-entrypoint.sh" ]
|
||||
|
||||
RUN yum -y install iproute &&\
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
@@ -54,4 +63,6 @@ USER cobald
|
||||
|
||||
STOPSIGNAL SIGINT
|
||||
|
||||
CMD "python3 -m cobald.daemon /etc/cobald/config.yaml"
|
||||
# CMD "python3 -m cobald.daemon /etc/cobald/config.yaml"
|
||||
CMD /etc/docker-init.d/60-cobald
|
||||
{%- endif %}
|
12
roles/cobald/vars/cobald-slurm.yml
Normal file
12
roles/cobald/vars/cobald-slurm.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
cobald_image_tag: slurm
|
||||
cobald_docker_base_image: "{{slurm.base_image}}"
|
||||
cobald_docker_default_command: False
|
||||
cobald_docker_network: "{{slurm.network}}"
|
||||
cobald_domainname: "{{slurm.domain}}"
|
||||
cobald_slurm_mounts: "{{slurm.mounts}}"
|
||||
#- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
##- "{{slurm_cfg_path | mandatory}}:/etc/slurm/:rw"
|
||||
#- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
## - "{{slurm_munge_path | mandatory}}:/etc/munge/munge.key:rw"
|
||||
#- slurm-shared:/shared/:rw
|
||||
## - "{{slurm_shared_path | mandatory}}:{{slurm_shared_target | default('/shared')}}:rw"
|
1
roles/cobald/vars/main.yml
Normal file
1
roles/cobald/vars/main.yml
Normal file
@@ -0,0 +1 @@
|
||||
cobald_docker_network: "{{docker_network}}"
|
11
roles/cobald_facts/tasks/main.yml
Normal file
11
roles/cobald_facts/tasks/main.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
- block:
|
||||
- docker_container_info:
|
||||
name: "{{ container_name | mandatory }}"
|
||||
register: cobald_container_info
|
||||
|
||||
- set_fact:
|
||||
cobald_container_hostname: |-
|
||||
{{cobald_container_info.container.Config.Hostname | default('cobald-'+
|
||||
lookup('password', '/dev/null chars=ascii_lowercase length=6')) }}
|
||||
when: cobald_container_hostname is not defined
|
||||
|
@@ -3,3 +3,6 @@ slurm_user: slurm
|
||||
slurm_log_path_ctld: /var/log/slurm/slurmctld.log
|
||||
slurm_log_path_d: /var/log/slurm/slurmd.log
|
||||
slurm_log_path_sched: /var/log/slurm/slurmsched.log
|
||||
slurm_prefix: slurm
|
||||
slurm_domain: slurm.local
|
||||
docker_network: slurm
|
||||
|
18
roles/slurm/files/docker-init
Normal file
18
roles/slurm/files/docker-init
Normal file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
function trp_term(){
|
||||
echo pkill -P $pids
|
||||
for j in $pids ; do
|
||||
pkill -P $j
|
||||
kill -SIGTERM $j
|
||||
done
|
||||
}
|
||||
trap trp_term SIGINT SIGTERM
|
||||
pids=""
|
||||
for i in /etc/docker-init.d/* ; do
|
||||
[ ! -f $i ] && break
|
||||
$i &
|
||||
pids="$pids $!"
|
||||
done
|
||||
wait $pids
|
||||
|
||||
# TODO: call start scripts like "foo.sh start" and "foo.sh stop" to avoid pkill
|
@@ -1,32 +1,11 @@
|
||||
FROM docker.io/library/centos:7 as base
|
||||
|
||||
RUN yum install -y epel-release && \
|
||||
yum install -y slurm && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||
|
||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||
|
||||
ARG slurmuser=slurm
|
||||
ENV slurmuser=${slurmuser}
|
||||
|
||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
||||
FROM slurm:base
|
||||
|
||||
RUN yum install -y slurm-slurmctld && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
COPY start-scripts/20-slurmctld /etc/docker-init.d/20-slurmctld
|
||||
RUN chmod 755 /etc/docker-init.d/20-slurmctld
|
||||
|
||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||
|
||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
|
||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
||||
|
@@ -1,32 +1,11 @@
|
||||
FROM docker.io/library/centos:7
|
||||
|
||||
RUN yum install -y epel-release && \
|
||||
yum install -y slurm && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||
|
||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||
|
||||
ARG slurmuser=slurm
|
||||
ENV slurmuser=${slurmuser}
|
||||
|
||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
||||
FROM slurm:base
|
||||
|
||||
RUN yum install -y slurm-slurmd && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
COPY start-scripts/30-slurmd /etc/docker-init.d/30-slurmd
|
||||
RUN chmod 755 /etc/docker-init.d/30-slurmd
|
||||
|
||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||
|
||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
||||
slurmd -D 2>/dev/null 1>/dev/null & \
|
||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
||||
|
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null &
|
||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||
|
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
slurmd -D 2>/dev/null 1>/dev/null &
|
||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||
|
@@ -1,3 +1,3 @@
|
||||
- name: reconfigure slurm
|
||||
command:
|
||||
cmd: docker container exec -it slurm-ctl scontrol reconfigure
|
||||
shell:
|
||||
cmd: "docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure || docker container restart {{slurm_prefix}}-ctl && docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure"
|
||||
|
@@ -1,31 +1,50 @@
|
||||
- file:
|
||||
path: "/container/docker-images/{{item}}"
|
||||
path: "/container/docker-images/slurm"
|
||||
state: directory
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
|
||||
- copy:
|
||||
src: "{{item}}.Dockerfile"
|
||||
dest: "/container/docker-images/{{item}}/Dockerfile"
|
||||
- copy: # FIXME: template
|
||||
src: "{{image.name}}.Dockerfile"
|
||||
dest: "/container/docker-images/slurm/{{image.name}}.Dockerfile"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
register: slurm_cp_dockerfile
|
||||
|
||||
- copy:
|
||||
src: "entrypoint.sh"
|
||||
dest: "/container/docker-images/{{item}}/entrypoint.sh"
|
||||
- name: copy entrypoint and docker-init
|
||||
copy: # FIXME: swap out
|
||||
src: "{{item}}"
|
||||
dest: "/container/docker-images/slurm/{{item}}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=rwx,g=rx,o=rx
|
||||
loop:
|
||||
- entrypoint.sh
|
||||
- docker-init
|
||||
register: slurm_cp_entrypt
|
||||
|
||||
- name: copy startup scripts
|
||||
copy:
|
||||
src: "start-scripts/"
|
||||
dest: "/container/docker-images/slurm/start-scripts/"
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=rwx,g=rx,o=rx
|
||||
register: slurm_cp_stscrs
|
||||
|
||||
- set_fact:
|
||||
slurm_image_files_changed: "{{ (slurm_image_files_changed | default(False))
|
||||
or slurm_cp_entrypt.changed or slurm_cp_stscrs.changed }}"
|
||||
|
||||
- docker_image:
|
||||
name: "slurm-{{item}}"
|
||||
name: "slurm"
|
||||
tag: "{{image.name}}"
|
||||
# pull: False
|
||||
build:
|
||||
pull: False
|
||||
path: "/container/docker-images/{{item}}"
|
||||
# target: "{{item}}" # unsupported on old docker-py versions as in el7
|
||||
path: "/container/docker-images/slurm/"
|
||||
dockerfile: "{{image.name}}.Dockerfile"
|
||||
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
|
||||
source: build
|
||||
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}"
|
||||
force_source: "{{slurm_cp_dockerfile.changed or slurm_image_files_changed}}"
|
||||
|
||||
|
@@ -3,10 +3,22 @@
|
||||
name: [ slurm, slurm-doc ]
|
||||
state: present
|
||||
|
||||
- include_tasks: dockerimage.yml
|
||||
loop:
|
||||
- slurmctld
|
||||
- slurmd
|
||||
- include_role:
|
||||
name: slurm_dockerimage
|
||||
loop: # FIXME: default(omit)!
|
||||
- name: slurmctld
|
||||
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
|
||||
files:
|
||||
- dest: start-scripts/20-slurmctld
|
||||
content: "{{ lookup('file', 'start-scripts/20-slurmctld') }}"
|
||||
- name: slurmd
|
||||
dockerfile: "{{ lookup('file', 'slurmd.Dockerfile') }}"
|
||||
files:
|
||||
- dest: start-scripts/30-slurmd
|
||||
content: "{{ lookup('file', 'start-scripts/30-slurmd') }}"
|
||||
loop_control:
|
||||
loop_var: image
|
||||
label: "{{ image.name }}"
|
||||
|
||||
- name: generate munge key
|
||||
shell:
|
||||
@@ -47,12 +59,14 @@
|
||||
loop:
|
||||
- slurm.conf
|
||||
- cgroup.conf
|
||||
vars:
|
||||
alloc_nodes: "{{ [ slurm_prefix+'-submit1' ] + extra_nodes | default([])}}"
|
||||
notify: reconfigure slurm
|
||||
tags: [ slurm-config ]
|
||||
|
||||
- name: "create docker network to make service discovery work"
|
||||
docker_network:
|
||||
name: slurm
|
||||
name: "{{ docker_network }}"
|
||||
state: present
|
||||
|
||||
- name: "create docker volume for shared access between nodes"
|
||||
@@ -61,45 +75,62 @@
|
||||
state: present
|
||||
|
||||
- set_fact:
|
||||
slurm_nodes: # default nodes: controller and submit machine
|
||||
slurm_nodes_std: # default nodes: controller and submit machine
|
||||
- machine: ctl
|
||||
image: slurm-slurmctld
|
||||
image: slurm:slurmctld
|
||||
exposed_ports: [ "6817:6817/tcp" ]
|
||||
- machine: submit1
|
||||
image: slurm-slurmd
|
||||
image: slurm:slurmd
|
||||
extra_mounts:
|
||||
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
||||
slurm_nodes_exec: | # extend range to execute nodes list
|
||||
{% set slurm_nodes_exec = slurm_nodes_exec | default([]) %}
|
||||
{% for i in range(1, num_nodes+1) -%}
|
||||
{% set _ = slurm_nodes_exec.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes_exec }}
|
||||
slurm_default_mounts:
|
||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
- slurm-shared:/shared/:rw
|
||||
slurm_network: "{{docker_network}}"
|
||||
tags: [ slurm-config ]
|
||||
|
||||
|
||||
# TODO: reserve some address using docker_network_info and assign as aux
|
||||
# address to enable slurmctld to get a static address in order to be
|
||||
# reachable from slurm running on docker host to enable submitting jobs.
|
||||
|
||||
- name: run slurm docker containers
|
||||
docker_container:
|
||||
name: "slurm-{{item.machine}}"
|
||||
hostname: "slurm-{{item.machine}}"
|
||||
domainname: "slurm.local"
|
||||
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||
name: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
hostname: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||
domainname: "{{ slurm_domain }}"
|
||||
volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||
ports: "{{ item.exposed_ports | default([]) }}"
|
||||
networks:
|
||||
- name: "slurm"
|
||||
- name: "{{ slurm_network }}"
|
||||
env:
|
||||
slurmuser: "{{slurm_user}}"
|
||||
image: "{{item.image}}"
|
||||
slurmuser: "{{ slurm_user }}"
|
||||
image: "{{ item.image }}"
|
||||
state: started
|
||||
detach: True
|
||||
cleanup: True
|
||||
privileged: "{{ container_privileged | bool }}"
|
||||
networks_cli_compatible: True
|
||||
interactive: True
|
||||
vars:
|
||||
default_mounts:
|
||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||
- slurm-shared:/shared/:rw
|
||||
slurm_nodes_all: | # add execute nodes
|
||||
{% for i in range(1, 4) -%}
|
||||
{% set _ = slurm_nodes.extend([
|
||||
{'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%}
|
||||
{%- endfor %}
|
||||
{{ slurm_nodes }}
|
||||
loop: "{{slurm_nodes_all}}"
|
||||
slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}"
|
||||
loop: "{{ slurm_nodes_all }}"
|
||||
loop_control:
|
||||
label: "{{slurm_prefix}}-{{ item.machine }}"
|
||||
tags: [ slurm-config ]
|
||||
|
||||
- name: set facts to be used by other modules
|
||||
set_fact:
|
||||
slurm:
|
||||
user: "{{slurm_user}}"
|
||||
domain: "{{slurm_domain}}"
|
||||
base_image: "slurm:base"
|
||||
mounts: "{{slurm_default_mounts}}"
|
||||
network: "{{docker_network}}"
|
||||
|
@@ -164,5 +164,7 @@ SlurmSchedLogFile={{slurm_log_path_sched}}
|
||||
#
|
||||
# COMPUTE NODES
|
||||
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
|
||||
NodeName=slurm-submit1 CPUs=1 State=UNKNOWN
|
||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes=slurm-submit1 Default=YES MaxTime=INFINITE State=UP
|
||||
{% for i in alloc_nodes -%}
|
||||
NodeName={{i}} State=UNKNOWN
|
||||
{% endfor %}
|
||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes={{alloc_nodes | join(',')}} Default=YES MaxTime=INFINITE State=UP
|
||||
|
@@ -5,5 +5,3 @@ if [ -f "/etc/munge/munge.key" ] ; then
|
||||
chown munge:munge /etc/munge/munge.key
|
||||
chmod 600 /etc/munge/munge.key
|
||||
fi
|
||||
|
||||
exec "$@"
|
8
roles/slurm_dockerimage/files/entrypoint.sh
Normal file
8
roles/slurm_dockerimage/files/entrypoint.sh
Normal file
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
for i in /usr/local/lib/entrypoints.d/* ; do
|
||||
[ -f $i ] && /bin/sh $i || break
|
||||
done
|
||||
|
||||
exec "${@:-/bin/bash}"
|
31
roles/slurm_dockerimage/files/slurm-base.Dockerfile
Normal file
31
roles/slurm_dockerimage/files/slurm-base.Dockerfile
Normal file
@@ -0,0 +1,31 @@
|
||||
FROM docker.io/library/centos:7
|
||||
|
||||
RUN yum install -y epel-release && \
|
||||
yum install -y slurm && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
|
||||
RUN mkdir -p /usr/local/lib/entrypoints.d/
|
||||
|
||||
COPY --chown=root:root entry-munge.sh /usr/local/lib/entrypoints.d/10-munge.sh
|
||||
COPY --chown=root:root entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||
|
||||
RUN chmod 755 /usr/local/lib/entrypoints.d/10-munge.sh && \
|
||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||
|
||||
ARG slurmuser=slurm
|
||||
ENV slurmuser=${slurmuser}
|
||||
|
||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
||||
|
||||
COPY docker-init /usr/local/sbin/docker-init
|
||||
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
|
||||
COPY start-scripts/10-munge /etc/docker-init.d/10-munge
|
||||
RUN chmod 755 /etc/docker-init.d/10-munge
|
||||
|
||||
CMD /usr/local/sbin/docker-init
|
2
roles/slurm_dockerimage/files/start-scripts/10-munge
Normal file
2
roles/slurm_dockerimage/files/start-scripts/10-munge
Normal file
@@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
exec su -s /bin/sh -c "munged -F" munge
|
40
roles/slurm_dockerimage/tasks/dockerimage.yml
Normal file
40
roles/slurm_dockerimage/tasks/dockerimage.yml
Normal file
@@ -0,0 +1,40 @@
|
||||
- name: create directories for docker image build
|
||||
file:
|
||||
path: "/container/docker-images/slurm-{{image.name}}/{{item}}"
|
||||
state: directory
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
loop: "{{ [''] + (image.files | map(attribute='dest') | map('dirname') |
|
||||
unique | select | list) }}"
|
||||
|
||||
- name: copy Dockerfile
|
||||
copy:
|
||||
src: "{{image.name}}.Dockerfile"
|
||||
dest: "/container/docker-images/slurm-{{image.name}}/Dockerfile"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
register: slurm_cp_dockerfile
|
||||
|
||||
- name: copy requisite files
|
||||
copy:
|
||||
content: "{{ item.content }}"
|
||||
dest: "/container/docker-images/slurm-{{image.name}}/{{item.dest}}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=rwx,g=rx,o=rx
|
||||
loop: "{{ image.files | default([]) }}"
|
||||
loop_control:
|
||||
label: "{{ item.dest }}"
|
||||
register: slurm_cp_files
|
||||
|
||||
- docker_image:
|
||||
name: "slurm"
|
||||
tag: "{{image.name}}"
|
||||
# pull: False
|
||||
build:
|
||||
pull: False
|
||||
path: "/container/docker-images/slurm-{{image.name}}/"
|
||||
source: build
|
||||
force_source: "{{slurm_cp_dockerfile.changed or
|
||||
slurm_cp_files.changed or
|
||||
slurm_baseimg_build_chg }}"
|
46
roles/slurm_dockerimage/tasks/main.yml
Normal file
46
roles/slurm_dockerimage/tasks/main.yml
Normal file
@@ -0,0 +1,46 @@
|
||||
- file:
|
||||
path: "/container/docker-images/slurm-base/start-scripts"
|
||||
state: directory
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
|
||||
# - name: copy Dockerfile, entrypoint, docker-init and munge startup
|
||||
- name: copy slurm base image requisite files
|
||||
copy: # FIXME: swap out
|
||||
src: "{{item.file}}"
|
||||
dest: "/container/docker-images/slurm-base/{{item.file}}"
|
||||
owner: "{{unpriv_user}}"
|
||||
group: docker
|
||||
mode: "{{ item.perms | default('u=rwx,g=rx,o=rx') }}"
|
||||
loop:
|
||||
- file: slurm-base.Dockerfile
|
||||
perms: u=rw,g=r,o=r
|
||||
- file: entrypoint.sh
|
||||
- file: entry-munge.sh
|
||||
- file: docker-init
|
||||
- file: start-scripts/10-munge
|
||||
when: not (slurm_baseimg_build_chg | default(False))
|
||||
register: slurm_baseimg_copy
|
||||
|
||||
- name: build base image
|
||||
docker_image:
|
||||
name: "slurm"
|
||||
tag: "base"
|
||||
# pull: False
|
||||
build:
|
||||
pull: False
|
||||
path: "/container/docker-images/slurm-base/"
|
||||
dockerfile: "slurm-base.Dockerfile"
|
||||
# target: "{{image.name}}" # unsupported on old docker-py version as in el7
|
||||
source: build
|
||||
force_source: "{{slurm_baseimg_copy.changed}}"
|
||||
# when: run only once but keep changed state
|
||||
when: not (slurm_baseimg_build_chg | default(False))
|
||||
register: slurm_baseimg_build
|
||||
|
||||
- set_fact:
|
||||
slurm_baseimg_build_chg:
|
||||
"{{(slurm_baseimg_build_chg | default(False)) or
|
||||
slurm_baseimg_build.changed}}"
|
||||
|
||||
- include_tasks: dockerimage.yml
|
Reference in New Issue
Block a user