Compare commits
41 Commits
ddc6c2bb4d
...
cobaldtard
Author | SHA1 | Date | |
---|---|---|---|
4c63f2a825
|
|||
51390bb321
|
|||
52022a3013
|
|||
574d2fcb4e
|
|||
2919c98d5f
|
|||
f73fef1473
|
|||
8bc2f717e0
|
|||
d88761ca7d
|
|||
3be5025442
|
|||
4c4c4da79d
|
|||
1a952a4e7a
|
|||
74a760cf98
|
|||
cd7dea8fda
|
|||
7e767c3716
|
|||
a61d08d118
|
|||
188a9215a9
|
|||
9499ce49ae
|
|||
9237d736d8
|
|||
e979ea4d6e
|
|||
c7e931f29e
|
|||
a73f9ad6ad
|
|||
c35dc25c39
|
|||
1f4dfe1821
|
|||
78850d4636
|
|||
f83801cb62
|
|||
e78e184375
|
|||
02e87d7c40
|
|||
4450c9bb65
|
|||
6eb6984d6a
|
|||
cc43a39ea3 | |||
962d9b5ac9
|
|||
e81fb5d445
|
|||
73945b6cb9
|
|||
089ea914b6 | |||
dd1baa4aef
|
|||
ea3195a93c
|
|||
aef1499e65
|
|||
c7203f58ff
|
|||
2e0d83cca1
|
|||
35882ca1a9 | |||
4e7f33338e
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
.*.swp
|
.*.swp
|
||||||
*.retry
|
*.retry
|
||||||
|
vars_auth.yml
|
||||||
|
27
base.yml
Normal file
27
base.yml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
---
|
||||||
|
- hosts: all
|
||||||
|
tasks:
|
||||||
|
- name: "install epel repo" # for htop etc.
|
||||||
|
yum:
|
||||||
|
name: epel-release
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: "install tools"
|
||||||
|
yum:
|
||||||
|
name: [ vim-enhanced, htop, screen, bind-utils, nmap-ncat, net-tools ]
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: "screenrc native scrolling in tmux"
|
||||||
|
copy:
|
||||||
|
content: "termcapinfo xterm* ti@:te@\ntermcapinfo screen* ti@:te@\n"
|
||||||
|
dest: "{{item}}"
|
||||||
|
with_items:
|
||||||
|
- "~{{unpriv_user}}/.screenrc"
|
||||||
|
- "~root/.screenrc"
|
||||||
|
|
||||||
|
- name: "install ssh-key"
|
||||||
|
authorized_key:
|
||||||
|
user: "{{cfg_unpriv_user}}"
|
||||||
|
key: "{{cfg_ssh_key}}"
|
||||||
|
state: present
|
||||||
|
|
61
cobald.yml
Normal file
61
cobald.yml
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
---
|
||||||
|
- hosts: slurm, cobald
|
||||||
|
vars:
|
||||||
|
container_privileged: True
|
||||||
|
slurm_num_nodes: 10
|
||||||
|
tasks:
|
||||||
|
- name: "setup docker"
|
||||||
|
import_role: name=docker
|
||||||
|
tags: docker
|
||||||
|
|
||||||
|
- name: "get facts from existing cobald instance (i.e. hostname)"
|
||||||
|
include_role:
|
||||||
|
name: cobald
|
||||||
|
tasks_from: facts
|
||||||
|
apply:
|
||||||
|
tags: slurm, cobald, slurm-config
|
||||||
|
tags: slurm, cobald, slurm-config
|
||||||
|
vars:
|
||||||
|
container_name: cobald
|
||||||
|
|
||||||
|
- name: "setup slurm test environment in docker containers"
|
||||||
|
include_role:
|
||||||
|
name: slurm
|
||||||
|
apply:
|
||||||
|
tags: slurm
|
||||||
|
vars:
|
||||||
|
slurm_user: slurm # or root
|
||||||
|
slurm_user_accounts:
|
||||||
|
- name: cobald
|
||||||
|
dir: /var/lib/cobald
|
||||||
|
num_nodes: "{{slurm_num_nodes}}"
|
||||||
|
extra_nodes:
|
||||||
|
- name: cobald
|
||||||
|
hostname: "{{cobald_container_hostname}}" # from cobald/facts.yml above
|
||||||
|
# hostname is used as NodeHostname, which is used slurms "networking
|
||||||
|
# code" (https://bugs.schedmd.com/show_bug.cgi?id=8615).
|
||||||
|
# It works either way around, but one of NodeName or NodeHostname has
|
||||||
|
# to match the container name (-n flag, not --hostname) since when
|
||||||
|
# submitting tasks to the slurm controller, it matches access
|
||||||
|
# permissions against a reverse lookup of the submitting ip address.
|
||||||
|
# Docker always and unconfigureably resolves the container ip in any
|
||||||
|
# network to containername.netname, where containername is the
|
||||||
|
# containers runtime name (not hostname supplied!) and netname is
|
||||||
|
# the network name in host environment. We should run our own dns...
|
||||||
|
docker_network: slurm
|
||||||
|
slurm_hostsystem_cluster_access: True
|
||||||
|
when: '"slurm" in group_names'
|
||||||
|
tags: slurm, cobald, influxdb, slurm-config
|
||||||
|
# tags: cobald requires some slurm facts, so cobald tag is included here
|
||||||
|
|
||||||
|
- name: "install cobald"
|
||||||
|
include_role:
|
||||||
|
name: cobald
|
||||||
|
apply:
|
||||||
|
tags: cobald
|
||||||
|
vars:
|
||||||
|
cobald_slurm: True
|
||||||
|
container_name: cobald
|
||||||
|
# docker_network: slurm # overriden by vars/slurm.yml
|
||||||
|
when: '"cobald" in group_names'
|
||||||
|
tags: cobald, influxdb, singularity
|
32
htcondor.yml
Normal file
32
htcondor.yml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
---
|
||||||
|
- hosts: htcondor
|
||||||
|
tasks:
|
||||||
|
- name: "install htcondor repo"
|
||||||
|
yum:
|
||||||
|
name: https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm
|
||||||
|
state: present
|
||||||
|
tags: htcondor
|
||||||
|
|
||||||
|
- name: "install htcondor software "
|
||||||
|
yum:
|
||||||
|
name: htcondor-ce
|
||||||
|
state: present
|
||||||
|
tags: htcondor
|
||||||
|
|
||||||
|
- name: "remove minicondor configuration"
|
||||||
|
yum:
|
||||||
|
name: minicondor
|
||||||
|
state: absent
|
||||||
|
tags: htcondor
|
||||||
|
|
||||||
|
- name: "setup singularity"
|
||||||
|
import_role: name="singularity"
|
||||||
|
tags: singularity
|
||||||
|
|
||||||
|
- name: "setup docker"
|
||||||
|
import_role: name=docker
|
||||||
|
tags: docker
|
||||||
|
|
||||||
|
- name: "setup htcondor test environment in docker containers"
|
||||||
|
import_role: name=docker-htcondor
|
||||||
|
tags: htcondor-containered, htcondor
|
4
install.sh
Executable file
4
install.sh
Executable file
@@ -0,0 +1,4 @@
|
|||||||
|
mkdir -p ./collections/ansible_collections/community
|
||||||
|
# git clone --depth=1 -b 1.2.1 https://github.com/ansible-collections/community.grafana.git ./collections/ansible_collections/community/grafana
|
||||||
|
git clone --depth=1 -b 1.2.1-extended https://github.com/ansible_community.grafana.git ./collections/ansible_collections/community/grafana
|
||||||
|
|
10
inv.yml
10
inv.yml
@@ -6,6 +6,14 @@ all:
|
|||||||
ssh_args: -o ControlMaster=auto -o ControlPersist=60s
|
ssh_args: -o ControlMaster=auto -o ControlPersist=60s
|
||||||
# ansible_host: 192.168.122.139
|
# ansible_host: 192.168.122.139
|
||||||
unpriv_user: thoto
|
unpriv_user: thoto
|
||||||
|
cfg_unpriv_user: thoto
|
||||||
|
ed-c7-2:
|
||||||
|
ansible_user: root
|
||||||
|
ansible_host: ed-c7-2.virt.uller.thoto.net
|
||||||
|
# ansible_host: 192.168.123.60 # + jumphost
|
||||||
|
ssh_args: -o ControlMaster=auto -o ControlPersist=60s
|
||||||
|
unpriv_user: thoto
|
||||||
|
cfg_unpriv_user: thoto
|
||||||
children:
|
children:
|
||||||
htcondor:
|
htcondor:
|
||||||
hosts:
|
hosts:
|
||||||
@@ -13,6 +21,8 @@ all:
|
|||||||
slurm:
|
slurm:
|
||||||
hosts:
|
hosts:
|
||||||
ed-c7-1:
|
ed-c7-1:
|
||||||
|
ed-c7-2:
|
||||||
cobald:
|
cobald:
|
||||||
hosts:
|
hosts:
|
||||||
ed-c7-1:
|
ed-c7-1:
|
||||||
|
ed-c7-2:
|
||||||
|
66
play.yml
66
play.yml
@@ -1,62 +1,10 @@
|
|||||||
---
|
---
|
||||||
- hosts: all
|
- name: base setup
|
||||||
tasks:
|
import_playbook: base.yml
|
||||||
- name: "install tools"
|
|
||||||
yum:
|
|
||||||
name: [ vim-enhanced, htop, screen, bind-utils, nmap-ncat, net-tools ]
|
|
||||||
state: present
|
|
||||||
|
|
||||||
- hosts: htcondor
|
- name: setup htcondor
|
||||||
pre_tasks:
|
import_playbook: htcondor.yml
|
||||||
- name: "install htcondor repo"
|
when: '"htcondor" in group_names'
|
||||||
yum:
|
|
||||||
name: https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm
|
|
||||||
state: present
|
|
||||||
tags: htcondor
|
|
||||||
|
|
||||||
- name: "install htcondor software "
|
- name: setup slurm and cobald
|
||||||
yum:
|
import_playbook: cobald.yml
|
||||||
name: htcondor-ce
|
|
||||||
state: present
|
|
||||||
tags: htcondor
|
|
||||||
|
|
||||||
- name: "remove minicondor configuration"
|
|
||||||
yum:
|
|
||||||
name: minicondor
|
|
||||||
state: absent
|
|
||||||
tags: htcondor
|
|
||||||
|
|
||||||
- name: "setup singularity"
|
|
||||||
import_tasks: "singularity.yml"
|
|
||||||
tags: singularity
|
|
||||||
|
|
||||||
roles:
|
|
||||||
- name: "setup docker"
|
|
||||||
role: docker
|
|
||||||
tags: docker
|
|
||||||
|
|
||||||
- name: "setup htcondor test environment in docker containers"
|
|
||||||
role: docker-htcondor
|
|
||||||
tags:
|
|
||||||
- htcondor-containered
|
|
||||||
- htcondor
|
|
||||||
|
|
||||||
- hosts: slurm
|
|
||||||
vars:
|
|
||||||
container_privileged: True
|
|
||||||
num_nodes: 3
|
|
||||||
roles:
|
|
||||||
- name: "setup docker"
|
|
||||||
role: docker
|
|
||||||
tags: docker
|
|
||||||
- name: "setup slurm test environment in docker containers"
|
|
||||||
role: slurm
|
|
||||||
vars:
|
|
||||||
slurm_user: slurm # or root
|
|
||||||
tags: slurm
|
|
||||||
|
|
||||||
- hosts: cobald
|
|
||||||
roles:
|
|
||||||
- name: "install cobald"
|
|
||||||
role: cobald
|
|
||||||
tags: cobald
|
|
||||||
|
6
roles/cobald/defaults/main.yml
Normal file
6
roles/cobald/defaults/main.yml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
cobald_domainname: cobald.local
|
||||||
|
influx_admin_user: my-user
|
||||||
|
influx_admin_pw: my-password
|
||||||
|
influx_org: my-org
|
||||||
|
influx_pubport: 28086
|
||||||
|
influx_bucket: batleth
|
3
roles/cobald/files/28-sync-container-slurmd
Normal file
3
roles/cobald/files/28-sync-container-slurmd
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
[ /slurm-singimage/slurmd.sif -nt /shared/slurmd.sif ] && \
|
||||||
|
cp /slurm-singimage/slurmd.sif /shared/slurmd.sif
|
3
roles/cobald/files/31-slurmd-configless
Normal file
3
roles/cobald/files/31-slurmd-configless
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
slurmd --conf-server ${slurmctld} -D -N ${nodename} 2>/dev/null 1>/dev/null &
|
||||||
|
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
11
roles/cobald/files/cgroup.conf.noautomount
Normal file
11
roles/cobald/files/cgroup.conf.noautomount
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
###
|
||||||
|
#
|
||||||
|
# Slurm cgroup support configuration file
|
||||||
|
#
|
||||||
|
# See man slurm.conf and man cgroup.conf for further
|
||||||
|
# information on cgroup configuration parameters
|
||||||
|
#--
|
||||||
|
CgroupAutomount=no
|
||||||
|
|
||||||
|
ConstrainCores=no
|
||||||
|
ConstrainRAMSpace=no
|
31
roles/cobald/files/cobald-config/config.yaml
Normal file
31
roles/cobald/files/cobald-config/config.yaml
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
---
|
||||||
|
pipeline:
|
||||||
|
- __type__: cobald.controller.linear.LinearController
|
||||||
|
low_utilisation: 0.9
|
||||||
|
high_allocation: 0.9
|
||||||
|
rate: 0.10
|
||||||
|
- !Limiter
|
||||||
|
minimum: 3
|
||||||
|
- !TelegrafPipelineMonitor
|
||||||
|
poll: True
|
||||||
|
- !TardisPoolFactory
|
||||||
|
configuration: /etc/cobald/tardis.yaml
|
||||||
|
logging:
|
||||||
|
version: 1
|
||||||
|
root:
|
||||||
|
level: DEBUG
|
||||||
|
handlers: [console, file]
|
||||||
|
handlers:
|
||||||
|
console:
|
||||||
|
class: logging.StreamHandler
|
||||||
|
formatter: test
|
||||||
|
level: DEBUG
|
||||||
|
stream: ext://sys.stderr
|
||||||
|
file:
|
||||||
|
class: logging.handlers.RotatingFileHandler
|
||||||
|
formatter: test
|
||||||
|
level: WARNING
|
||||||
|
filename: /var/log/cobald/cobald-tardis.log
|
||||||
|
formatters:
|
||||||
|
test:
|
||||||
|
format: " %(name)s %(message)s"
|
41
roles/cobald/files/cobald-config/tardis.yaml
Normal file
41
roles/cobald/files/cobald-config/tardis.yaml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
Plugins:
|
||||||
|
SqliteRegistry:
|
||||||
|
db_file: /tmp/drone_registry.db
|
||||||
|
TelegrafMonitoring:
|
||||||
|
host: ed-telegraf
|
||||||
|
port: 8094
|
||||||
|
#BatchSystem:
|
||||||
|
# adapter: FakeBatchSystem
|
||||||
|
# allocation: 1.0
|
||||||
|
# utilisation: !PeriodicValue
|
||||||
|
# period: 60
|
||||||
|
# amplitude: 0.15
|
||||||
|
# offset: 0.80
|
||||||
|
## phase: 1.
|
||||||
|
# phase: 1.6
|
||||||
|
# machine_status: Available
|
||||||
|
BatchSystem:
|
||||||
|
adapter: Slurm
|
||||||
|
max_age: 0.1
|
||||||
|
options:
|
||||||
|
partition: cobald
|
||||||
|
Sites:
|
||||||
|
- name: slurmtest
|
||||||
|
adapter: Slurm
|
||||||
|
quota: 20
|
||||||
|
slurmtest:
|
||||||
|
# executor: ...
|
||||||
|
StatusUpdate: 0.1
|
||||||
|
MachineTypes:
|
||||||
|
- m1.a
|
||||||
|
MachineTypeConfiguration:
|
||||||
|
m1.a:
|
||||||
|
Walltime: 5
|
||||||
|
Partition: container
|
||||||
|
StartupCommand: /usr/local/bin/start-drone
|
||||||
|
# SubmitOptions: ...
|
||||||
|
MachineMetaData:
|
||||||
|
m1.a:
|
||||||
|
Cores: 3 # cores
|
||||||
|
Memory: 1 # GB
|
||||||
|
Disk: 4 # not passed
|
@@ -1,7 +1,7 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
[ -f /usr/local/lib/cobaldmodules/setup.py -a \
|
for i in /usr/local/lib/entrypoints.d/* ; do
|
||||||
-d /usr/local/lib/cobaldmodules/cobaldmodules ] && \
|
[ -f $i ] && /bin/sh $i || break
|
||||||
pip3 install --no-deps --editable /usr/local/lib/cobaldmodules
|
done
|
||||||
|
|
||||||
exec "${@:-/bin/bash}"
|
exec "${@:-/bin/bash}"
|
||||||
|
589
roles/cobald/files/grafana-dashboard.json
Normal file
589
roles/cobald/files/grafana-dashboard.json
Normal file
@@ -0,0 +1,589 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": "-- Grafana --",
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"gnetId": null,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": 1,
|
||||||
|
"iteration": 1623317629899,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"datasource": null,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 1,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"panels": [],
|
||||||
|
"title": "Row title",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "InfluxDB",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "none"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 9,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 1
|
||||||
|
},
|
||||||
|
"hiddenSeries": false,
|
||||||
|
"id": 2,
|
||||||
|
"interval": "1s",
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": true,
|
||||||
|
"min": true,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"maxDataPoints": 200,
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"options": {
|
||||||
|
"alertThreshold": true
|
||||||
|
},
|
||||||
|
"percentage": false,
|
||||||
|
"pluginVersion": "7.5.7",
|
||||||
|
"pointradius": 2,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "f_r = (r, accumulator) => ({\n _value: accumulator._value + (\n if r._value == \"AvailableState\" then 1\n else if r._value == \"DownState\" then -1\n else 0)\n })\n\nnodes = from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> keep(columns: [\"tardis_machine_name\", \"_time\"])\n |> sort(columns: [\"_time\"], desc: true)\n |> unique(column: \"tardis_machine_name\")\n// |> yield()\n\noffset = from(bucket: \"batleth\")\n |> range(start: 0, stop: v.timeRangeStart)\n |> filter(fn: (r) => r._field == \"state\")\n |> group(columns: [\"tardis_machine_name\", \"machine_type\"])\n |> reduce(fn: f_r, identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n\nnew = from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._field == \"state\")\n// |> filter(fn: (r) => r.tardis_machine_name == \"${machine}\")\n |> group(columns: [\"tardis_machine_name\", \"machine_type\"])\n |> window(every: $__interval)\n |> reduce(fn: f_r, identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n\nunion(tables: [offset, new])\n |> window(every: inf)\n |> cumulativeSum()\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> yield()\n",
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeRegions": [],
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "nodes running",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "none",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"yaxis": {
|
||||||
|
"align": false,
|
||||||
|
"alignLevel": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "InfluxDB",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 9,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 1
|
||||||
|
},
|
||||||
|
"hiddenSeries": false,
|
||||||
|
"id": 8,
|
||||||
|
"interval": "1s",
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"maxDataPoints": null,
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"options": {
|
||||||
|
"alertThreshold": true
|
||||||
|
},
|
||||||
|
"percentage": false,
|
||||||
|
"pluginVersion": "7.5.7",
|
||||||
|
"pointradius": 2,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"tardis_pipeline\")\n |> filter(fn: (r) => r._field == \"demand\" or r._field == \"supply\")\n |> drop(columns: [\"host\"])\n |> aggregateWindow(every: $__interval, fn: mean)\n |> yield()",
|
||||||
|
"queryType": "randomWalk",
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeRegions": [],
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "pipeline demand/supply (mean)",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"$$hashKey": "object:115",
|
||||||
|
"decimals": null,
|
||||||
|
"format": "short",
|
||||||
|
"label": "cpus",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$$hashKey": "object:116",
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"yaxis": {
|
||||||
|
"align": false,
|
||||||
|
"alignLevel": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "InfluxDB",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 10
|
||||||
|
},
|
||||||
|
"hiddenSeries": false,
|
||||||
|
"id": 6,
|
||||||
|
"interval": "1s",
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"options": {
|
||||||
|
"alertThreshold": true
|
||||||
|
},
|
||||||
|
"percentage": false,
|
||||||
|
"pluginVersion": "7.5.7",
|
||||||
|
"pointradius": 2,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "f = (r, accumulator) => ({\n _value: accumulator._value + (if r._value == \"AvailableState\" then 1 else if r._value == \"DownState\" then -1 else 0)\n })\n\nfrom(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._field == \"state\")\n |> group(columns: [\"tardis_machine_name\"])\n |> window(every: $__interval)\n |> reduce(fn: f, identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n |> window(every: inf, timeColumn: \"_time\")\n |> yield()",
|
||||||
|
"queryType": "randomWalk",
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeRegions": [],
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "node fluctuation",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"yaxis": {
|
||||||
|
"align": false,
|
||||||
|
"alignLevel": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "InfluxDB",
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 10
|
||||||
|
},
|
||||||
|
"hiddenSeries": false,
|
||||||
|
"id": 10,
|
||||||
|
"interval": "1s",
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"maxDataPoints": null,
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"options": {
|
||||||
|
"alertThreshold": true
|
||||||
|
},
|
||||||
|
"percentage": false,
|
||||||
|
"pluginVersion": "7.5.7",
|
||||||
|
"pointradius": 2,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$__interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"orderByTime": "ASC",
|
||||||
|
"policy": "default",
|
||||||
|
"query": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"tardis_pipeline\")\n |> filter(fn: (r) => r._field == \"utilisation\" or r._field == \"allocation\")\n |> keep(columns: [\"_time\", \"_measurement\", \"_field\", \"_value\", \"tardis_machine_name\"])\n |> aggregateWindow(every: $__interval, fn: mean)\n |> yield()",
|
||||||
|
"queryType": "randomWalk",
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeRegions": [],
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "pipeline (utilization/allocation)",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"yaxis": {
|
||||||
|
"align": false,
|
||||||
|
"alignLevel": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5s",
|
||||||
|
"schemaVersion": 27,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {
|
||||||
|
"selected": false,
|
||||||
|
"text": "cobald-xvmcqc",
|
||||||
|
"value": "cobald-xvmcqc"
|
||||||
|
},
|
||||||
|
"datasource": "InfluxDB",
|
||||||
|
"definition": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> keep(columns: [\"tardis_machine_name\", \"_time\"])\n |> sort(columns: [\"_time\"], desc: true)\n |> unique(column: \"tardis_machine_name\")",
|
||||||
|
"description": null,
|
||||||
|
"error": null,
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": null,
|
||||||
|
"multi": false,
|
||||||
|
"name": "machine",
|
||||||
|
"options": [],
|
||||||
|
"query": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> keep(columns: [\"tardis_machine_name\", \"_time\"])\n |> sort(columns: [\"_time\"], desc: true)\n |> unique(column: \"tardis_machine_name\")",
|
||||||
|
"refresh": 2,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-5m",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "",
|
||||||
|
"title": "cobald",
|
||||||
|
"uid": "urDuvE6Gk",
|
||||||
|
"version": 2
|
||||||
|
}
|
794
roles/cobald/files/influxdb-dashboard-cobald.json
Normal file
794
roles/cobald/files/influxdb-dashboard-cobald.json
Normal file
@@ -0,0 +1,794 @@
|
|||||||
|
{
|
||||||
|
"meta": {
|
||||||
|
"version": "1",
|
||||||
|
"type": "dashboard",
|
||||||
|
"name": "cobald-Template",
|
||||||
|
"description": "template created from dashboard: cobald"
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"data": {
|
||||||
|
"type": "dashboard",
|
||||||
|
"attributes": {
|
||||||
|
"name": "cobald",
|
||||||
|
"description": ""
|
||||||
|
},
|
||||||
|
"relationships": {
|
||||||
|
"label": {
|
||||||
|
"data": []
|
||||||
|
},
|
||||||
|
"cell": {
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"type": "cell",
|
||||||
|
"id": "07900a722c363000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "cell",
|
||||||
|
"id": "07900a7236f63000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "cell",
|
||||||
|
"id": "07900a723cf63000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "cell",
|
||||||
|
"id": "07900a7243f63000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "cell",
|
||||||
|
"id": "079e694f29581000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "cell",
|
||||||
|
"id": "079e6e037c181000"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"variable": {
|
||||||
|
"data": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"included": [
|
||||||
|
{
|
||||||
|
"id": "07900a722c363000",
|
||||||
|
"type": "cell",
|
||||||
|
"attributes": {
|
||||||
|
"x": 4,
|
||||||
|
"y": 0,
|
||||||
|
"w": 4,
|
||||||
|
"h": 4
|
||||||
|
},
|
||||||
|
"relationships": {
|
||||||
|
"view": {
|
||||||
|
"data": {
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a722c363000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "07900a7236f63000",
|
||||||
|
"type": "cell",
|
||||||
|
"attributes": {
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
"w": 4,
|
||||||
|
"h": 4
|
||||||
|
},
|
||||||
|
"relationships": {
|
||||||
|
"view": {
|
||||||
|
"data": {
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a7236f63000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "07900a723cf63000",
|
||||||
|
"type": "cell",
|
||||||
|
"attributes": {
|
||||||
|
"x": 4,
|
||||||
|
"y": 4,
|
||||||
|
"w": 4,
|
||||||
|
"h": 4
|
||||||
|
},
|
||||||
|
"relationships": {
|
||||||
|
"view": {
|
||||||
|
"data": {
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a723cf63000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "07900a7243f63000",
|
||||||
|
"type": "cell",
|
||||||
|
"attributes": {
|
||||||
|
"x": 0,
|
||||||
|
"y": 4,
|
||||||
|
"w": 4,
|
||||||
|
"h": 4
|
||||||
|
},
|
||||||
|
"relationships": {
|
||||||
|
"view": {
|
||||||
|
"data": {
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a7243f63000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "079e694f29581000",
|
||||||
|
"type": "cell",
|
||||||
|
"attributes": {
|
||||||
|
"x": 8,
|
||||||
|
"y": 0,
|
||||||
|
"w": 4,
|
||||||
|
"h": 4
|
||||||
|
},
|
||||||
|
"relationships": {
|
||||||
|
"view": {
|
||||||
|
"data": {
|
||||||
|
"type": "view",
|
||||||
|
"id": "079e694f29581000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "079e6e037c181000",
|
||||||
|
"type": "cell",
|
||||||
|
"attributes": {
|
||||||
|
"x": 8,
|
||||||
|
"y": 4,
|
||||||
|
"w": 4,
|
||||||
|
"h": 4
|
||||||
|
},
|
||||||
|
"relationships": {
|
||||||
|
"view": {
|
||||||
|
"data": {
|
||||||
|
"type": "view",
|
||||||
|
"id": "079e6e037c181000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a722c363000",
|
||||||
|
"attributes": {
|
||||||
|
"name": "nodes running",
|
||||||
|
"properties": {
|
||||||
|
"shape": "chronograf-v2",
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"text": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._field == \"state\")\n |> group()\n |> window(every: 10s)\n |> reduce(fn: (r, accumulator) => ({\n _value: accumulator._value + (\n if r._value == \"AvailableState\" then 1 \n else if r._value == \"DownState\" then -1 \n else 0)\n }), identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n |> window(every: inf) //, timeColumn: \"_time\")\n |> cumulativeSum()\n// |> reduce(fn: (r, accumulator) => ({r with x: r._value * 2}), identity: {x:0})\n// |> map(fn: (r) => ({r with vnew: r._value*2}))\n// |> integral(unit: 10s, timeColumn: \"_stop\")\n// |> window(every: inf) //, timeColumn: \"_stop\")\n |> yield()",
|
||||||
|
"editMode": "advanced",
|
||||||
|
"name": "",
|
||||||
|
"builderConfig": {
|
||||||
|
"buckets": [],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "_measurement",
|
||||||
|
"values": [],
|
||||||
|
"aggregateFunctionType": "filter"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"functions": [
|
||||||
|
{
|
||||||
|
"name": "mean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"aggregateWindow": {
|
||||||
|
"period": "auto",
|
||||||
|
"fillValues": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"axes": {
|
||||||
|
"x": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "xy",
|
||||||
|
"legend": {},
|
||||||
|
"geom": "line",
|
||||||
|
"colors": [
|
||||||
|
{
|
||||||
|
"id": "9b960932-18d9-4f57-80ba-24998a06613d",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#31C0F6",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "c1742651-0d5e-4148-b9c0-92beb642417a",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#A500A5",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "756fa8cb-9d9c-4e45-9a4f-f2b106b0216a",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#FF7E27",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"note": "",
|
||||||
|
"showNoteWhenEmpty": false,
|
||||||
|
"xColumn": "_time",
|
||||||
|
"generateXAxisTicks": [],
|
||||||
|
"xTotalTicks": 0,
|
||||||
|
"xTickStart": 0,
|
||||||
|
"xTickStep": 0,
|
||||||
|
"yColumn": "_value",
|
||||||
|
"generateYAxisTicks": [],
|
||||||
|
"yTotalTicks": 0,
|
||||||
|
"yTickStart": 0,
|
||||||
|
"yTickStep": 0,
|
||||||
|
"shadeBelow": false,
|
||||||
|
"position": "overlaid",
|
||||||
|
"timeFormat": "",
|
||||||
|
"hoverDimension": "auto",
|
||||||
|
"legendColorizeRows": true,
|
||||||
|
"legendOpacity": 1,
|
||||||
|
"legendOrientationThreshold": 100000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a7236f63000",
|
||||||
|
"attributes": {
|
||||||
|
"name": "Name this Cell",
|
||||||
|
"properties": {
|
||||||
|
"shape": "chronograf-v2",
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"text": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"tardis_machine_name\"] == \"678162c190d5\")\n |> window(every: 10s)\n |> count()\n |> duplicate(column: \"_stop\", as: \"_time\")\n |> window(every: inf)\n |> yield()",
|
||||||
|
"editMode": "advanced",
|
||||||
|
"name": "",
|
||||||
|
"builderConfig": {
|
||||||
|
"buckets": [],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "_measurement",
|
||||||
|
"values": [],
|
||||||
|
"aggregateFunctionType": "filter"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"functions": [
|
||||||
|
{
|
||||||
|
"name": "mean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"aggregateWindow": {
|
||||||
|
"period": "auto",
|
||||||
|
"fillValues": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"axes": {
|
||||||
|
"x": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "xy",
|
||||||
|
"legend": {},
|
||||||
|
"geom": "line",
|
||||||
|
"colors": [
|
||||||
|
{
|
||||||
|
"id": "2566435b-7ee0-4222-8ac0-b7f14ab783d9",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#31C0F6",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "9263bcff-35a0-4025-bacd-68a1bef54784",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#A500A5",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "2f04bd8c-2203-4be6-bc34-c25720d24379",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#FF7E27",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"note": "",
|
||||||
|
"showNoteWhenEmpty": false,
|
||||||
|
"xColumn": "_time",
|
||||||
|
"generateXAxisTicks": [],
|
||||||
|
"xTotalTicks": 0,
|
||||||
|
"xTickStart": 0,
|
||||||
|
"xTickStep": 0,
|
||||||
|
"yColumn": "_value",
|
||||||
|
"generateYAxisTicks": [],
|
||||||
|
"yTotalTicks": 0,
|
||||||
|
"yTickStart": 0,
|
||||||
|
"yTickStep": 0,
|
||||||
|
"shadeBelow": false,
|
||||||
|
"position": "overlaid",
|
||||||
|
"timeFormat": "",
|
||||||
|
"hoverDimension": "auto",
|
||||||
|
"legendColorizeRows": true,
|
||||||
|
"legendOpacity": 1,
|
||||||
|
"legendOrientationThreshold": 100000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a723cf63000",
|
||||||
|
"attributes": {
|
||||||
|
"name": "node fluctuation",
|
||||||
|
"properties": {
|
||||||
|
"shape": "chronograf-v2",
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"text": "f = (r, accumulator) => ({\n _value: accumulator._value + (if r._value == \"AvailableState\" then 1 else if r._value == \"DownState\" then -1 else 0)\n })\n\nfrom(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._field == \"state\")\n |> group()\n |> window(every: 10s)\n |> reduce(fn: f, identity: {_value: 0})\n |> duplicate(column: \"_stop\", as: \"_time\")\n |> window(every: inf, timeColumn: \"_time\")\n |> yield()",
|
||||||
|
"editMode": "advanced",
|
||||||
|
"name": "",
|
||||||
|
"builderConfig": {
|
||||||
|
"buckets": [],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "_measurement",
|
||||||
|
"values": [],
|
||||||
|
"aggregateFunctionType": "filter"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"functions": [
|
||||||
|
{
|
||||||
|
"name": "mean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"aggregateWindow": {
|
||||||
|
"period": "auto",
|
||||||
|
"fillValues": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"axes": {
|
||||||
|
"x": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "xy",
|
||||||
|
"legend": {},
|
||||||
|
"geom": "line",
|
||||||
|
"colors": [
|
||||||
|
{
|
||||||
|
"id": "9b960932-18d9-4f57-80ba-24998a06613d",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#31C0F6",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "c1742651-0d5e-4148-b9c0-92beb642417a",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#A500A5",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "756fa8cb-9d9c-4e45-9a4f-f2b106b0216a",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#FF7E27",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"note": "",
|
||||||
|
"showNoteWhenEmpty": false,
|
||||||
|
"xColumn": "_time",
|
||||||
|
"generateXAxisTicks": [],
|
||||||
|
"xTotalTicks": 0,
|
||||||
|
"xTickStart": 0,
|
||||||
|
"xTickStep": 0,
|
||||||
|
"yColumn": "_value",
|
||||||
|
"generateYAxisTicks": [],
|
||||||
|
"yTotalTicks": 0,
|
||||||
|
"yTickStart": 0,
|
||||||
|
"yTickStep": 0,
|
||||||
|
"shadeBelow": false,
|
||||||
|
"position": "overlaid",
|
||||||
|
"timeFormat": "",
|
||||||
|
"hoverDimension": "auto",
|
||||||
|
"legendColorizeRows": true,
|
||||||
|
"legendOpacity": 1,
|
||||||
|
"legendOrientationThreshold": 100000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "view",
|
||||||
|
"id": "07900a7243f63000",
|
||||||
|
"attributes": {
|
||||||
|
"name": "states",
|
||||||
|
"properties": {
|
||||||
|
"shape": "chronograf-v2",
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"text": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._field == \"state\")\n |> group(columns: [\"_value\"], mode: \"by\")\n |> duplicate(column: \"_value\", as: \"state\")\n |> window(every: 10s)\n |> count(column: \"state\")\n |> rename(columns: {\"_value\": \"_field\", \"state\": \"_value\"})\n |> group(columns: [\"_field\"])\n |> yield()",
|
||||||
|
"editMode": "advanced",
|
||||||
|
"name": "",
|
||||||
|
"builderConfig": {
|
||||||
|
"buckets": [],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "_measurement",
|
||||||
|
"values": [],
|
||||||
|
"aggregateFunctionType": "filter"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"functions": [
|
||||||
|
{
|
||||||
|
"name": "mean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"aggregateWindow": {
|
||||||
|
"period": "auto",
|
||||||
|
"fillValues": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"axes": {
|
||||||
|
"x": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "xy",
|
||||||
|
"legend": {},
|
||||||
|
"geom": "line",
|
||||||
|
"colors": [
|
||||||
|
{
|
||||||
|
"id": "9b960932-18d9-4f57-80ba-24998a06613d",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#31C0F6",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "c1742651-0d5e-4148-b9c0-92beb642417a",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#A500A5",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "756fa8cb-9d9c-4e45-9a4f-f2b106b0216a",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#FF7E27",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"note": "",
|
||||||
|
"showNoteWhenEmpty": false,
|
||||||
|
"xColumn": "_stop",
|
||||||
|
"generateXAxisTicks": [],
|
||||||
|
"xTotalTicks": 0,
|
||||||
|
"xTickStart": 0,
|
||||||
|
"xTickStep": 0,
|
||||||
|
"yColumn": "_value",
|
||||||
|
"generateYAxisTicks": [],
|
||||||
|
"yTotalTicks": 0,
|
||||||
|
"yTickStart": 0,
|
||||||
|
"yTickStep": 0,
|
||||||
|
"shadeBelow": false,
|
||||||
|
"position": "overlaid",
|
||||||
|
"timeFormat": "",
|
||||||
|
"hoverDimension": "auto",
|
||||||
|
"legendColorizeRows": true,
|
||||||
|
"legendOpacity": 1,
|
||||||
|
"legendOrientationThreshold": 100000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "view",
|
||||||
|
"id": "079e694f29581000",
|
||||||
|
"attributes": {
|
||||||
|
"name": "pipeline (demand / supply)",
|
||||||
|
"properties": {
|
||||||
|
"shape": "chronograf-v2",
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"text": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"tardis_pipeline\")\n |> filter(fn: (r) => r._field == \"demand\" or r._field == \"supply\")\n// |> filter(fn: (r) => r._field == \"state\")\n// |> group()\n// |> window(every: 10s)\n// |> duplicate(column: \"_stop\", as: \"_time\")\n// |> window(every: inf) //, timeColumn: \"_time\")\n// |> cumulativeSum()\n |> yield()",
|
||||||
|
"editMode": "advanced",
|
||||||
|
"name": "",
|
||||||
|
"builderConfig": {
|
||||||
|
"buckets": [],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "_measurement",
|
||||||
|
"values": [],
|
||||||
|
"aggregateFunctionType": "filter"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"functions": [
|
||||||
|
{
|
||||||
|
"name": "mean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"aggregateWindow": {
|
||||||
|
"period": "auto",
|
||||||
|
"fillValues": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"axes": {
|
||||||
|
"x": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "xy",
|
||||||
|
"legend": {},
|
||||||
|
"geom": "line",
|
||||||
|
"colors": [
|
||||||
|
{
|
||||||
|
"id": "4ef29481-ecf3-4a09-b0f5-e34e8d3e50b5",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#31C0F6",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "719de04a-f70f-4c54-a1bb-982a9d13dbae",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#A500A5",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "8d5c1f25-3801-4cdd-ad40-b8e2e78342a0",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#FF7E27",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"note": "",
|
||||||
|
"showNoteWhenEmpty": false,
|
||||||
|
"xColumn": "_time",
|
||||||
|
"generateXAxisTicks": [],
|
||||||
|
"xTotalTicks": 0,
|
||||||
|
"xTickStart": 0,
|
||||||
|
"xTickStep": 0,
|
||||||
|
"yColumn": "_value",
|
||||||
|
"generateYAxisTicks": [],
|
||||||
|
"yTotalTicks": 0,
|
||||||
|
"yTickStart": 0,
|
||||||
|
"yTickStep": 0,
|
||||||
|
"shadeBelow": false,
|
||||||
|
"position": "overlaid",
|
||||||
|
"timeFormat": "",
|
||||||
|
"hoverDimension": "auto",
|
||||||
|
"legendColorizeRows": true,
|
||||||
|
"legendOpacity": 1,
|
||||||
|
"legendOrientationThreshold": 100000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "view",
|
||||||
|
"id": "079e6e037c181000",
|
||||||
|
"attributes": {
|
||||||
|
"name": "pipeline (Utilization / Allocation)",
|
||||||
|
"properties": {
|
||||||
|
"shape": "chronograf-v2",
|
||||||
|
"queries": [
|
||||||
|
{
|
||||||
|
"text": "from(bucket: \"batleth\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"tardis_pipeline\")\n |> filter(fn: (r) => r._field == \"utilisation\" or r._field == \"allocation\")\n// |> filter(fn: (r) => r._field == \"state\")\n// |> group()\n// |> window(every: 10s)\n// |> duplicate(column: \"_stop\", as: \"_time\")\n// |> window(every: inf) //, timeColumn: \"_time\")\n// |> cumulativeSum()\n |> yield()",
|
||||||
|
"editMode": "advanced",
|
||||||
|
"name": "",
|
||||||
|
"builderConfig": {
|
||||||
|
"buckets": [],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"key": "_measurement",
|
||||||
|
"values": [],
|
||||||
|
"aggregateFunctionType": "filter"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"functions": [
|
||||||
|
{
|
||||||
|
"name": "mean"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"aggregateWindow": {
|
||||||
|
"period": "auto",
|
||||||
|
"fillValues": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"axes": {
|
||||||
|
"x": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"bounds": [
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"label": "",
|
||||||
|
"prefix": "",
|
||||||
|
"suffix": "",
|
||||||
|
"base": "10",
|
||||||
|
"scale": "linear"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "xy",
|
||||||
|
"legend": {},
|
||||||
|
"geom": "line",
|
||||||
|
"colors": [
|
||||||
|
{
|
||||||
|
"id": "4ef29481-ecf3-4a09-b0f5-e34e8d3e50b5",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#31C0F6",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "719de04a-f70f-4c54-a1bb-982a9d13dbae",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#A500A5",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "8d5c1f25-3801-4cdd-ad40-b8e2e78342a0",
|
||||||
|
"type": "scale",
|
||||||
|
"hex": "#FF7E27",
|
||||||
|
"name": "Nineteen Eighty Four",
|
||||||
|
"value": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"note": "",
|
||||||
|
"showNoteWhenEmpty": false,
|
||||||
|
"xColumn": "_time",
|
||||||
|
"generateXAxisTicks": [],
|
||||||
|
"xTotalTicks": 0,
|
||||||
|
"xTickStart": 0,
|
||||||
|
"xTickStep": 0,
|
||||||
|
"yColumn": "_value",
|
||||||
|
"generateYAxisTicks": [],
|
||||||
|
"yTotalTicks": 0,
|
||||||
|
"yTickStart": 0,
|
||||||
|
"yTickStep": 0,
|
||||||
|
"shadeBelow": false,
|
||||||
|
"position": "overlaid",
|
||||||
|
"timeFormat": "",
|
||||||
|
"hoverDimension": "auto",
|
||||||
|
"legendColorizeRows": true,
|
||||||
|
"legendOpacity": 1,
|
||||||
|
"legendOrientationThreshold": 100000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"labels": []
|
||||||
|
}
|
6
roles/cobald/files/influxdb.repo
Normal file
6
roles/cobald/files/influxdb.repo
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
[influxdb]
|
||||||
|
name = InfluxDB Repository - RHEL \$releasever
|
||||||
|
baseurl = https://repos.influxdata.com/rhel/\$releasever/\$basearch/stable
|
||||||
|
enabled = 1
|
||||||
|
gpgcheck = 1
|
||||||
|
gpgkey = https://repos.influxdata.com/influxdb.key
|
5
roles/cobald/files/init-cobaldmodules.sh
Normal file
5
roles/cobald/files/init-cobaldmodules.sh
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
[ -f /usr/local/lib/cobaldmodules/setup.py -a \
|
||||||
|
-d /usr/local/lib/cobaldmodules/cobaldmodules ] && \
|
||||||
|
pip3 install --no-deps --editable /usr/local/lib/cobaldmodules
|
31
roles/cobald/files/slurm-slurmd.def
Normal file
31
roles/cobald/files/slurm-slurmd.def
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
Bootstrap: docker-daemon
|
||||||
|
From: slurm:slurmd
|
||||||
|
|
||||||
|
%files
|
||||||
|
31-slurmd-configless /etc/docker-init.d/31-slurm-configless
|
||||||
|
/container/volumes/munge/munge.key /etc/munge/munge.key
|
||||||
|
cgroup.conf.noautomount /etc/slurm/cgroup.conf
|
||||||
|
|
||||||
|
%post
|
||||||
|
rm /etc/docker-init.d/30-slurmd
|
||||||
|
chmod 755 /etc/docker-init.d/31-slurm-configless
|
||||||
|
|
||||||
|
%startscript
|
||||||
|
if [ -z "${1}" -o -z "${2}" ] ; then
|
||||||
|
echo "undefined variables slurmctld or nodename"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
export slurmctld="${1}"
|
||||||
|
export nodename="${2}"
|
||||||
|
echo ${slurmctld} ${nodename} ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||||
|
exec /usr/local/sbin/entrypoint.sh /usr/local/sbin/docker-init
|
||||||
|
|
||||||
|
%runscript
|
||||||
|
if [ -z "${1}" -o -z "${2}" ] ; then
|
||||||
|
echo "undefined variables slurmctld or nodename"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
export slurmctld="${1}"
|
||||||
|
export nodename="${2}"
|
||||||
|
echo ${slurmctld} ${nodename} ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||||
|
exec /usr/local/sbin/entrypoint.sh /usr/local/sbin/docker-init
|
59
roles/cobald/files/start-drone
Normal file
59
roles/cobald/files/start-drone
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#SBATCH -D /shared
|
||||||
|
export
|
||||||
|
echo $@
|
||||||
|
nodename=$(hostname | awk '{ print "drone" substr($1,match($1, "([[:digit:]]+)")) }')
|
||||||
|
|
||||||
|
SHUTDOWN_DONE=0
|
||||||
|
|
||||||
|
function handler_quit(){
|
||||||
|
[ $SHUTDOWN_DONE -ne 0 ] && return
|
||||||
|
set -x
|
||||||
|
echo "drain container"
|
||||||
|
scontrol update NodeName=${nodename} State=DRAIN Reason="cobald node quit"
|
||||||
|
shutdown_jobs=$(squeue -w ${nodename} --noheader -O jobid)
|
||||||
|
[ -n "${shutdown_jobs}" ] && scancel ${shutdown_jobs}
|
||||||
|
#scancel -w ${nodename}
|
||||||
|
i=$(( $(scontrol show config | grep KillWait | \
|
||||||
|
sed 's/^KillWait.*= \([0-9]*\) sec/\1/') - 2 ))
|
||||||
|
while [ -n "$(squeue -w ${nodename} --noheader -O jobid)" -o ${i} -lt 1 ]
|
||||||
|
do
|
||||||
|
i=$(( ${i} - 1 ))
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
scancel -s KILL -w ${nodename} # hard kill all remaining jobs
|
||||||
|
echo "shutdown container"
|
||||||
|
scontrol update NodeName=${nodename} State=DOWN Reason=shutdown
|
||||||
|
singularity instance stop slurm-drone
|
||||||
|
scontrol update NodeName=${nodename} State=FUTURE
|
||||||
|
umount /inner-cgroup/freezer
|
||||||
|
umount /inner-cgroup
|
||||||
|
SHUTDOWN_DONE=1
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# set -x
|
||||||
|
|
||||||
|
trap handler_quit EXIT
|
||||||
|
|
||||||
|
echo "mounting cgroups"
|
||||||
|
mkdir /inner-cgroup
|
||||||
|
mount -t tmpfs none /inner-cgroup
|
||||||
|
mkdir /inner-cgroup/freezer/
|
||||||
|
mount --bind /sys/fs/cgroup/freezer/slurm/ /inner-cgroup/freezer/
|
||||||
|
mount -o remount,ro /inner-cgroup
|
||||||
|
|
||||||
|
echo "starting ${nodename}"
|
||||||
|
scontrol update NodeName=${nodename} State=RESUME # revoke last DRAIN
|
||||||
|
scontrol update NodeName=${nodename} State=FUTURE
|
||||||
|
singularity instance start \
|
||||||
|
-B /inner-cgroup/:/sys/fs/cgroup/ \
|
||||||
|
--writable-tmpfs /shared/slurmd.sif slurm-drone \
|
||||||
|
slurm-ctl ${nodename}
|
||||||
|
# scontrol update NodeName=${nodename} NodeHostname=${SLURM_JOB_ID}
|
||||||
|
scontrol update NodeName=${nodename} NodeHostname=${TardisDroneUuid}
|
||||||
|
if [ $? -eq 0 ] ; then
|
||||||
|
echo "container started, sleeping $(( 60 * ${SLURM_Walltime} - 2 ))"
|
||||||
|
sleep $(( 60 * ${SLURM_Walltime} - 2 ))
|
||||||
|
fi
|
||||||
|
handler_quit
|
8
roles/cobald/files/telegraf.Dockerfile
Normal file
8
roles/cobald/files/telegraf.Dockerfile
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
FROM centos:7
|
||||||
|
|
||||||
|
COPY influxdb.repo /etc/yum.repos.d/influxdb.repo
|
||||||
|
|
||||||
|
RUN yum -y install telegraf &&\
|
||||||
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
CMD telegraf
|
11
roles/cobald/library/TODO.md
Normal file
11
roles/cobald/library/TODO.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Tests
|
||||||
|
ANSIBLE_LIBRARY=. ansible -m influx_bucket -a "base='http://192.168.122.140:28086' org='my-org' auth_token='87-fEnSlQldFi1T_CLHsrHxH-T9VKey-qzUbVH6tmR2QzL4oZzbUPwzS1wzOoIkyfmyGbRv75yLjYfztxziivw==' name='bucky' description='test 123'" localhost -vvv
|
||||||
|
ANSIBLE_LIBRARY=. ansible -m influx_token -a "base='http://192.168.122.140:28086' org='my-org' auth_token='87-fEnSlQldFi1T_CLHsrHxH-T9VKey-qzUbVH6tmR2QzL4oZzbUPwzS1wzOoIkyfmyGbRv75yLjYfztxziivw==' key='foo' description='test 123' permissions=\"{{'[{\\\"action\\\": \\\"write\\\",\\\"resource\\\": {\\\"type\\\": \\\"buckets\\\"} }]'|from_json}}\"" localhost -vvv
|
||||||
|
ANSIBLE_LIBRARY=. ansible -m influx_dashboard -create -a "base='http://192.168.122.140:28086' org='my-org' token='2Mji-PvTzgn2oie5p36pJ-vxqWCnxczMWGrnYz2nUHj6Q6XvdIGiLPmK4DjX16KGhOjxQ5dWymDusE8qJrhFFg==' data='{{lookup(\"file\", \"../files/influxdb-dashboard-cobald.json\")}}'" localhost -vvv
|
||||||
|
Missing: lot of stuff, e.g. missing tokens, invalid data like bucket instead of buckets ...
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
* tests
|
||||||
|
* state (present/absent)
|
||||||
|
* `module_utils/urls.py` (https://github.com/ansible/ansible/blob/devel/lib/ansible/module_utils/urls.py)
|
||||||
|
* see module notes
|
@@ -1,58 +0,0 @@
|
|||||||
- yum:
|
|
||||||
name: git
|
|
||||||
state: present
|
|
||||||
|
|
||||||
- git:
|
|
||||||
repo: https://github.com/thoto/cobald
|
|
||||||
dest: "~{{unpriv_user}}/cobald-src"
|
|
||||||
version: bugfix/mixed_construction_methods
|
|
||||||
become: yes
|
|
||||||
become_user: "{{unpriv_user}}"
|
|
||||||
register: cobald_git_pull
|
|
||||||
|
|
||||||
- git:
|
|
||||||
repo: https://github.com/MatterMiners/tardis
|
|
||||||
dest: "~{{unpriv_user}}/tardis-src"
|
|
||||||
version: master
|
|
||||||
become: yes
|
|
||||||
become_user: "{{unpriv_user}}"
|
|
||||||
register: tardis_git_pull
|
|
||||||
|
|
||||||
- name: "get unpriv_user {{unpriv_user}} uid and gid"
|
|
||||||
getent:
|
|
||||||
database: passwd
|
|
||||||
key: "{{unpriv_user}}"
|
|
||||||
|
|
||||||
- name: run pip install
|
|
||||||
docker_container:
|
|
||||||
image: cobald
|
|
||||||
name: "cobald-src-{{item.name}}-install"
|
|
||||||
volumes:
|
|
||||||
- "~{{unpriv_user}}/{{item.name}}-src:/usr/local/src/{{item.name}}:rw"
|
|
||||||
state: started
|
|
||||||
detach: False
|
|
||||||
cleanup: True
|
|
||||||
user: "{{getent_passwd[unpriv_user][1]}}:{{getent_passwd[unpriv_user][2]}}"
|
|
||||||
entrypoint: ""
|
|
||||||
command: |
|
|
||||||
bash -c 'HOME=/tmp pip3 install --editable /usr/local/src/{{item.name}}'
|
|
||||||
with_items:
|
|
||||||
- name: cobald
|
|
||||||
run: "{{cobald_git_pull.changed}}"
|
|
||||||
- name: tardis
|
|
||||||
run: "{{tardis_git_pull.changed}}"
|
|
||||||
when: item.run
|
|
||||||
|
|
||||||
- docker_container:
|
|
||||||
name: cobald-dev
|
|
||||||
image: cobald
|
|
||||||
volumes:
|
|
||||||
- "~{{unpriv_user}}/cobald:/etc/cobald"
|
|
||||||
- "~{{unpriv_user}}/cobald/modules:/usr/local/src/cobaldmodules"
|
|
||||||
- "~{{unpriv_user}}/cobald-src:/usr/local/src/cobald:ro"
|
|
||||||
- "~{{unpriv_user}}/tardis-src:/usr/local/src/tardis:ro"
|
|
||||||
state: started
|
|
||||||
detach: True
|
|
||||||
cleanup: True
|
|
||||||
interactive: True
|
|
||||||
command: /bin/bash
|
|
34
roles/cobald/tasks/dockerimage-generic.yml
Normal file
34
roles/cobald/tasks/dockerimage-generic.yml
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
- file:
|
||||||
|
path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/"
|
||||||
|
state: directory
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
|
||||||
|
- template:
|
||||||
|
src: cobald.Dockerfile
|
||||||
|
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/Dockerfile"
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
register: cobald_cp_dockerfile
|
||||||
|
|
||||||
|
- copy:
|
||||||
|
src: "{{item}}"
|
||||||
|
dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/{{item}}"
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
mode: 0755
|
||||||
|
with_items:
|
||||||
|
- cobald-entrypoint.sh
|
||||||
|
- init-cobaldmodules.sh
|
||||||
|
register: cobald_cp_files
|
||||||
|
|
||||||
|
- docker_image:
|
||||||
|
name: "cobald"
|
||||||
|
tag: "{{cobald_image_tag|default('latest')}}"
|
||||||
|
# pull: False
|
||||||
|
build:
|
||||||
|
pull: False
|
||||||
|
path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/"
|
||||||
|
source: build
|
||||||
|
force_source: "{{cobald_cp_dockerfile.changed or cobald_cp_files.changed}}"
|
||||||
|
|
11
roles/cobald/tasks/facts.yml
Normal file
11
roles/cobald/tasks/facts.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
- block:
|
||||||
|
- docker_container_info:
|
||||||
|
name: "{{ container_name | mandatory }}"
|
||||||
|
register: cobald_container_info
|
||||||
|
|
||||||
|
- set_fact:
|
||||||
|
cobald_container_hostname: |-
|
||||||
|
{{cobald_container_info.container.Config.Hostname | default('cobald-'+
|
||||||
|
lookup('password', '/dev/null chars=ascii_lowercase length=6')) }}
|
||||||
|
when: cobald_container_hostname is not defined
|
||||||
|
|
54
roles/cobald/tasks/grafana.yml
Normal file
54
roles/cobald/tasks/grafana.yml
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
- name: create influx token for grafana
|
||||||
|
influx_token:
|
||||||
|
base: "http://localhost:{{influx_pubport}}"
|
||||||
|
org: "my-org"
|
||||||
|
auth_token: "{{influx_admin_token}}"
|
||||||
|
description: grafana read access
|
||||||
|
key: grafana
|
||||||
|
permissions:
|
||||||
|
- action: read
|
||||||
|
resource:
|
||||||
|
type: buckets
|
||||||
|
register: influx_grafana_token
|
||||||
|
|
||||||
|
- name: run grafana
|
||||||
|
docker_container:
|
||||||
|
name: ed-grafana
|
||||||
|
image: docker.io/grafana/grafana:7.5.7
|
||||||
|
hostname: ed-grafana
|
||||||
|
domainname: cobald.local
|
||||||
|
networks:
|
||||||
|
- name: "{{cobald_docker_network}}"
|
||||||
|
networks_cli_compatible: True
|
||||||
|
published_ports:
|
||||||
|
- "3000:3000"
|
||||||
|
state: started
|
||||||
|
detach: True
|
||||||
|
cleanup: True
|
||||||
|
|
||||||
|
- wait_for:
|
||||||
|
host: localhost
|
||||||
|
port: 3000
|
||||||
|
|
||||||
|
- community.grafana.grafana_datasource:
|
||||||
|
grafana_url: http://localhost:3000
|
||||||
|
grafana_user: admin
|
||||||
|
grafana_password: admin
|
||||||
|
name: InfluxDB
|
||||||
|
ds_type: influxdb
|
||||||
|
ds_url: "{{influx_url}}"
|
||||||
|
additional_json_data:
|
||||||
|
defaultBucket: "{{influx_bucket}}"
|
||||||
|
organization: "{{influx_org}}"
|
||||||
|
version: Flux
|
||||||
|
additional_secure_json_data:
|
||||||
|
token: "{{influx_grafana_token.token}}"
|
||||||
|
|
||||||
|
- community.grafana.grafana_dashboard:
|
||||||
|
grafana_url: http://localhost:3000
|
||||||
|
grafana_user: admin
|
||||||
|
grafana_password: admin
|
||||||
|
state: present
|
||||||
|
commit_message: updated by ansible
|
||||||
|
overwrite: yes
|
||||||
|
json_data: "{{lookup('file', 'grafana-dashboard.json')|from_json}}"
|
76
roles/cobald/tasks/influxdb.yml
Normal file
76
roles/cobald/tasks/influxdb.yml
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
- name: run influxdb in docker container
|
||||||
|
docker_container:
|
||||||
|
name: ed-influxdb
|
||||||
|
image: docker.io/library/influxdb:2.0
|
||||||
|
hostname: "{{cobald_influx_hostname}}"
|
||||||
|
domainname: "{{cobald_domainname}}"
|
||||||
|
networks:
|
||||||
|
- name: "{{ cobald_docker_network }}"
|
||||||
|
networks_cli_compatible: True
|
||||||
|
published_ports:
|
||||||
|
- "{{influx_pubport}}:8086"
|
||||||
|
volumes:
|
||||||
|
- "ed-influxdb-data:/var/lib/influxdb2"
|
||||||
|
- "ed-influxdb-config:/etc/influxdb2"
|
||||||
|
- "/container/volumes/influxdb-backup/:/backup"
|
||||||
|
env:
|
||||||
|
DOCKER_INFLUXDB_INIT_MODE: setup
|
||||||
|
DOCKER_INFLUXDB_INIT_USERNAME: "{{influx_admin_user}}"
|
||||||
|
DOCKER_INFLUXDB_INIT_PASSWORD: "{{influx_admin_pw}}"
|
||||||
|
DOCKER_INFLUXDB_INIT_ORG: "{{influx_org}}"
|
||||||
|
DOCKER_INFLUXDB_INIT_BUCKET: my-bucket
|
||||||
|
state: started
|
||||||
|
detach: True
|
||||||
|
cleanup: True
|
||||||
|
|
||||||
|
- name: add ansible connection to influxdb container
|
||||||
|
add_host:
|
||||||
|
name: ed-influxdb
|
||||||
|
ansible_connection: docker
|
||||||
|
ansible_docker_extra_args: "-H=ssh://{{ansible_host}}"
|
||||||
|
changed_when: False
|
||||||
|
|
||||||
|
- name: wait for influx to run
|
||||||
|
raw: until curl http://localhost:8086 ; do sleep 1 ; done
|
||||||
|
changed_when: False
|
||||||
|
delegate_to: ed-influxdb
|
||||||
|
|
||||||
|
- name: fetch influxdb auth token
|
||||||
|
raw: influx auth list --user my-user --hide-headers --json
|
||||||
|
register: influx_token_fetch
|
||||||
|
changed_when: False
|
||||||
|
delegate_to: ed-influxdb
|
||||||
|
|
||||||
|
- name: set influxdb admin token
|
||||||
|
set_fact:
|
||||||
|
influx_admin_token:
|
||||||
|
"{{(influx_token_fetch.stdout | from_json | first).token}}"
|
||||||
|
|
||||||
|
- name: create influxdb bucket for cobald
|
||||||
|
influx_bucket:
|
||||||
|
base: "http://localhost:{{influx_pubport}}"
|
||||||
|
org: "my-org"
|
||||||
|
auth_token: "{{influx_admin_token}}"
|
||||||
|
name: "{{influx_bucket}}"
|
||||||
|
|
||||||
|
- name: create influxdb dashboard
|
||||||
|
influx_dashboard:
|
||||||
|
base: "http://localhost:{{influx_pubport}}"
|
||||||
|
org: "my-org"
|
||||||
|
auth_token: "{{influx_admin_token}}"
|
||||||
|
data: "{{lookup('file', 'influxdb-dashboard-cobald.json')}}"
|
||||||
|
when: influxdb_dashboard | default(True)
|
||||||
|
|
||||||
|
- name: create influxdb write access token for telegraf
|
||||||
|
influx_token:
|
||||||
|
base: "http://localhost:{{influx_pubport}}"
|
||||||
|
org: "my-org"
|
||||||
|
auth_token: "{{influx_admin_token}}"
|
||||||
|
description: cobald tardis telegraf monitoring plugin
|
||||||
|
key: telegraf_cobaldtardis
|
||||||
|
permissions:
|
||||||
|
- action: write
|
||||||
|
resource:
|
||||||
|
type: buckets
|
||||||
|
name: "{{influx_bucket}}"
|
||||||
|
register: influx_telegraf_token
|
@@ -1,53 +1,149 @@
|
|||||||
- file:
|
- include_vars: cobald-slurm.yml
|
||||||
path: "/container/{{item}}/cobald/"
|
when: cobald_slurm | default(False)
|
||||||
|
tags: always
|
||||||
|
|
||||||
|
- name: build cobald:slurm docker image
|
||||||
|
include_role:
|
||||||
|
name: slurm
|
||||||
|
tasks_from: dockerimage
|
||||||
|
vars:
|
||||||
|
slurm_image_prefix: cobald
|
||||||
|
image_name: "{{cobald_image_tag}}"
|
||||||
|
dockerfile: "{{ lookup('template', 'cobald.Dockerfile') }}"
|
||||||
|
files_list:
|
||||||
|
- cobald-entrypoint.sh
|
||||||
|
- init-cobaldmodules.sh
|
||||||
|
- start-drone
|
||||||
|
- 28-sync-container-slurmd
|
||||||
|
files: "
|
||||||
|
{%- set files = [] -%} {%- for i in files_list -%}
|
||||||
|
{%- set files = files.append(
|
||||||
|
{ 'dest': i, 'content': lookup('file', i) }) -%}
|
||||||
|
{%- endfor %}{{ files }}"
|
||||||
|
when: cobald_slurm | default(False)
|
||||||
|
|
||||||
|
- name: build generic cobald docker image
|
||||||
|
include_tasks: dockerimage-generic.yml
|
||||||
|
when: not (cobald_slurm | default(False))
|
||||||
|
|
||||||
|
- name: make cobald data volume
|
||||||
|
file:
|
||||||
|
path: "/container/volumes/cobald/"
|
||||||
state: directory
|
state: directory
|
||||||
owner: "{{unpriv_user}}"
|
owner: "{{unpriv_user}}"
|
||||||
group: docker
|
group: docker
|
||||||
loop:
|
|
||||||
- docker-images
|
|
||||||
- volumes
|
|
||||||
|
|
||||||
- copy:
|
- name: copy cobald config
|
||||||
src: cobald.Dockerfile
|
copy:
|
||||||
dest: /container/docker-images/cobald/Dockerfile
|
|
||||||
owner: "{{unpriv_user}}"
|
|
||||||
group: docker
|
|
||||||
register: cobald_cp_dockerfile
|
|
||||||
|
|
||||||
- copy:
|
|
||||||
src: cobald-entrypoint.sh
|
|
||||||
dest: /container/docker-images/cobald/cobald-entrypoint.sh
|
|
||||||
owner: "{{unpriv_user}}"
|
|
||||||
group: docker
|
|
||||||
mode: 0755
|
|
||||||
register: cobald_cp_files
|
|
||||||
|
|
||||||
- docker_image:
|
|
||||||
name: "cobald"
|
|
||||||
# pull: False
|
|
||||||
build:
|
|
||||||
pull: False
|
|
||||||
path: "/container/docker-images/cobald/"
|
|
||||||
source: build
|
|
||||||
force_source: "{{cobald_cp_dockerfile.changed or cobald_cp_files.changed}}"
|
|
||||||
|
|
||||||
- copy:
|
|
||||||
src: cobald-config/
|
src: cobald-config/
|
||||||
dest: /container/volumes/cobald
|
dest: "~{{unpriv_user}}/cobald/"
|
||||||
|
force: False
|
||||||
owner: "{{unpriv_user}}"
|
owner: "{{unpriv_user}}"
|
||||||
group: docker
|
group: docker
|
||||||
when: False
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: ensure network for cobald container exists
|
||||||
|
docker_network:
|
||||||
|
name: "{{cobald_docker_network}}"
|
||||||
|
state: present
|
||||||
|
|
||||||
# docker run -v $(pwd)/cobald-config-host:/etc/cobald -v $(pwd)/cobald:/cobald --rm -it cobald bash
|
# docker run -v $(pwd)/cobald-config-host:/etc/cobald -v $(pwd)/cobald:/cobald --rm -it cobald bash
|
||||||
|
|
||||||
- docker_container:
|
- name: install git
|
||||||
name: cobald
|
yum:
|
||||||
image: cobald
|
name: git
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: make directories for cobald configuration and modules
|
||||||
|
file:
|
||||||
|
path: "{{item}}"
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: "{{unpriv_user}}"
|
||||||
|
mode: "a=rx,u=rwx"
|
||||||
|
state: directory
|
||||||
|
with_items:
|
||||||
|
- "~{{unpriv_user}}/cobald/modules"
|
||||||
|
- "~{{unpriv_user}}/cobald"
|
||||||
|
|
||||||
|
- name: clone cobald code from git
|
||||||
|
git:
|
||||||
|
repo: https://github.com/thoto/cobald
|
||||||
|
dest: "~{{unpriv_user}}/cobald-src"
|
||||||
|
version: bugfix/mixed_construction_methods
|
||||||
|
update: no # FIXME
|
||||||
|
become: yes
|
||||||
|
become_user: "{{unpriv_user}}"
|
||||||
|
register: cobald_git_pull
|
||||||
|
|
||||||
|
- name: clone tardis code from git
|
||||||
|
git:
|
||||||
|
repo: https://github.com/MatterMiners/tardis
|
||||||
|
dest: "~{{unpriv_user}}/tardis-src"
|
||||||
|
version: master
|
||||||
|
update: no # FIXME
|
||||||
|
become: yes
|
||||||
|
become_user: "{{unpriv_user}}"
|
||||||
|
register: tardis_git_pull
|
||||||
|
|
||||||
|
- name: "get unpriv_user {{unpriv_user}} uid and gid"
|
||||||
|
getent:
|
||||||
|
database: passwd
|
||||||
|
key: "{{unpriv_user}}"
|
||||||
|
|
||||||
|
- name: run pip install on cobald and tardis
|
||||||
|
docker_container:
|
||||||
|
image: "cobald:{{cobald_image_tag|default('latest')}}"
|
||||||
|
name: "cobald-src-{{item.name}}-install"
|
||||||
volumes:
|
volumes:
|
||||||
- /container/volumes/cobald:/etc/cobald:ro
|
- "~{{unpriv_user}}/{{item.name}}-src:/usr/local/src/{{item.name}}:rw"
|
||||||
|
state: started
|
||||||
|
detach: False
|
||||||
|
cleanup: True
|
||||||
|
user: "{{getent_passwd[unpriv_user][1]}}:{{getent_passwd[unpriv_user][2]}}"
|
||||||
|
entrypoint: ""
|
||||||
|
command: |
|
||||||
|
bash -c 'HOME=/tmp pip3 install --editable /usr/local/src/{{item.name}}'
|
||||||
|
with_items:
|
||||||
|
- name: cobald
|
||||||
|
run: "{{cobald_git_pull.changed}}"
|
||||||
|
- name: tardis
|
||||||
|
run: "{{tardis_git_pull.changed}}"
|
||||||
|
when: item.run
|
||||||
|
|
||||||
|
- import_tasks: telegraf.yml
|
||||||
|
|
||||||
|
- name: get cobald hostname
|
||||||
|
include_tasks: facts.yml
|
||||||
|
when: cobald_container_hostname is not defined
|
||||||
|
|
||||||
|
- name: build singularity container
|
||||||
|
include_tasks:
|
||||||
|
file: singularity.yml
|
||||||
|
apply:
|
||||||
|
tags: singularity
|
||||||
|
tags: singularity
|
||||||
|
|
||||||
|
- name: run cobald container
|
||||||
|
docker_container:
|
||||||
|
name: "{{ container_name | default('cobald') }}"
|
||||||
|
image: "cobald:{{cobald_image_tag|default('latest')}}"
|
||||||
|
hostname: "{{cobald_container_hostname}}"
|
||||||
|
domainname: "{{ cobald_domainname | default('cobald.local')}}"
|
||||||
|
volumes: "{{default_mounts + cobald_mounts }}"
|
||||||
|
networks:
|
||||||
|
- name: "{{cobald_docker_network}}"
|
||||||
|
networks_cli_compatible: True
|
||||||
state: started
|
state: started
|
||||||
detach: True
|
detach: True
|
||||||
cleanup: True
|
cleanup: True
|
||||||
when: False
|
interactive: True
|
||||||
|
# command: python3 -m cobald.daemon /etc/cobald/config.yaml
|
||||||
- include_tasks: dev.yml
|
vars:
|
||||||
|
default_mounts: "{{cobald_slurm_mounts | default([])}}"
|
||||||
|
cobald_mounts:
|
||||||
|
- "~{{unpriv_user}}/cobald:/etc/cobald"
|
||||||
|
# - /container/volumes/cobald:/etc/cobald:ro
|
||||||
|
- "/container/docker-images/sing-slurmd/build/:/slurm-singimage/:ro"
|
||||||
|
- "~{{unpriv_user}}/cobald/modules:/usr/local/src/cobaldmodules"
|
||||||
|
- "~{{unpriv_user}}/cobald-src:/usr/local/src/cobald:ro"
|
||||||
|
- "~{{unpriv_user}}/tardis-src:/usr/local/src/tardis:ro"
|
||||||
|
48
roles/cobald/tasks/singularity.yml
Normal file
48
roles/cobald/tasks/singularity.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
- name: setup singularity
|
||||||
|
import_role: name="singularity"
|
||||||
|
tags: singularity
|
||||||
|
|
||||||
|
- name: make singularity image build directory
|
||||||
|
file:
|
||||||
|
state: directory
|
||||||
|
path: "{{item}}"
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: "docker"
|
||||||
|
mode: "0755"
|
||||||
|
loop:
|
||||||
|
- /container/docker-images/sing-slurmd
|
||||||
|
- /container/docker-images/sing-slurmd/cache
|
||||||
|
- /container/docker-images/sing-slurmd/build
|
||||||
|
|
||||||
|
- name: copy slurm singularity container files
|
||||||
|
copy:
|
||||||
|
src: "{{item}}"
|
||||||
|
dest: "/container/docker-images/sing-slurmd/{{item}}"
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: "docker"
|
||||||
|
loop:
|
||||||
|
- slurm-slurmd.def
|
||||||
|
- 31-slurmd-configless
|
||||||
|
- cgroup.conf.noautomount
|
||||||
|
register: cobald_copy_sing_files
|
||||||
|
|
||||||
|
- name: remove old container
|
||||||
|
file:
|
||||||
|
path: /container/docker-images/sing-slurmd/build/slurmd.sif
|
||||||
|
state: absent
|
||||||
|
when: cobald_copy_sing_files.changed
|
||||||
|
|
||||||
|
- name: build container
|
||||||
|
shell:
|
||||||
|
chdir: /container/docker-images/sing-slurmd/
|
||||||
|
cmd: SINGULARITY_TMPDIR=/container/docker-images/sing-slurmd/cache
|
||||||
|
singularity build --disable-cache
|
||||||
|
/container/docker-images/sing-slurmd/build/slurmd.sif
|
||||||
|
/container/docker-images/sing-slurmd/slurm-slurmd.def
|
||||||
|
creates: /container/docker-images/sing-slurmd/build/slurmd.sif
|
||||||
|
register: cobald_sing_build
|
||||||
|
|
||||||
|
- debug: msg="{{[cobald_sing_build.stdout, cobald_sing_build.stderr]}}"
|
||||||
|
tags: [ never, debug ]
|
||||||
|
|
||||||
|
# TODO: trigger copy in cobald container when slurmd.sif rebuilt
|
69
roles/cobald/tasks/telegraf.yml
Normal file
69
roles/cobald/tasks/telegraf.yml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
- name: setup directories for telegraf
|
||||||
|
file:
|
||||||
|
path: "/container/{{item}}/telegraf/"
|
||||||
|
state: directory
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
loop:
|
||||||
|
- docker-images
|
||||||
|
- volumes
|
||||||
|
|
||||||
|
- name: copy telegraf Dockerfile
|
||||||
|
copy:
|
||||||
|
src: telegraf.Dockerfile
|
||||||
|
dest: /container/docker-images/telegraf/Dockerfile
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
register: cobald_cp_telegraf_dockerfile
|
||||||
|
|
||||||
|
- name: copy telegraf repo file
|
||||||
|
copy: # telegraf is found in influxdb repo
|
||||||
|
src: influxdb.repo
|
||||||
|
dest: /container/docker-images/telegraf/influxdb.repo
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
|
||||||
|
- name: docker image for telegraf
|
||||||
|
docker_image:
|
||||||
|
name: "ed-telegraf"
|
||||||
|
build:
|
||||||
|
pull: False
|
||||||
|
path: "/container/docker-images/telegraf/"
|
||||||
|
source: build
|
||||||
|
force_source: "{{cobald_cp_telegraf_dockerfile.changed}}"
|
||||||
|
|
||||||
|
- import_tasks: influxdb.yml
|
||||||
|
tags: influxdb
|
||||||
|
|
||||||
|
- name: generate telegraf config
|
||||||
|
template:
|
||||||
|
src: telegraf.conf.j2
|
||||||
|
dest: /container/volumes/telegraf/telegraf.conf
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
vars:
|
||||||
|
influx_token: "{{influx_telegraf_token.token}}"
|
||||||
|
influx_url: "http://{{cobald_influx_hostname}}:8086"
|
||||||
|
register: telegraf_config_gen
|
||||||
|
|
||||||
|
- name: run telegraf container
|
||||||
|
docker_container:
|
||||||
|
name: ed-telegraf
|
||||||
|
image: ed-telegraf
|
||||||
|
hostname: telegraf
|
||||||
|
domainname: "{{ cobald_domainname }}"
|
||||||
|
networks:
|
||||||
|
- name: "{{ cobald_docker_network }}"
|
||||||
|
aliases: ["ed-telegraf"]
|
||||||
|
volumes:
|
||||||
|
- "/container/volumes/telegraf/telegraf.conf:/etc/telegraf/telegraf.conf:ro"
|
||||||
|
state: started
|
||||||
|
recreate: "{{ telegraf_config_gen.changed | default(False) | bool }}"
|
||||||
|
detach: True
|
||||||
|
# cleanup: True
|
||||||
|
networks_cli_compatible: True
|
||||||
|
|
||||||
|
- import_tasks: grafana.yml
|
||||||
|
vars:
|
||||||
|
influx_url: "http://{{cobald_influx_hostname}}:8086"
|
||||||
|
tags: influxdb
|
@@ -1,4 +1,4 @@
|
|||||||
FROM docker.io/library/centos:7
|
FROM {{ cobald_docker_base_image | default("docker.io/library/centos:7") }}
|
||||||
|
|
||||||
RUN yum update -y && \
|
RUN yum update -y && \
|
||||||
yum install -y python3 git && pip3 install --upgrade pip && \
|
yum install -y python3 git && pip3 install --upgrade pip && \
|
||||||
@@ -11,7 +11,9 @@ RUN git clone $REPOCOBALD /usr/local/src/cobald && \
|
|||||||
git clone $REPOTARDIS /usr/local/src/tardis
|
git clone $REPOTARDIS /usr/local/src/tardis
|
||||||
|
|
||||||
RUN mkdir /etc/cobald /var/log/cobald && \
|
RUN mkdir /etc/cobald /var/log/cobald && \
|
||||||
useradd -m -d /var/lib/cobald --no-log-init --system cobald
|
( getent passwd cobald > /dev/null || \
|
||||||
|
useradd -m -d /var/lib/cobald --no-log-init --system cobald ) && \
|
||||||
|
chown cobald:cobald /var/log/cobald
|
||||||
|
|
||||||
#RUN mkdir /cobald && python3 -m venv /cobald && source /cobald/bin/activate &&\
|
#RUN mkdir /cobald && python3 -m venv /cobald && source /cobald/bin/activate &&\
|
||||||
# pip3 install --upgrade pip && pip3 install cobald
|
# pip3 install --upgrade pip && pip3 install cobald
|
||||||
@@ -40,12 +42,32 @@ VOLUME /usr/local/src/cobaldmodules
|
|||||||
|
|
||||||
VOLUME /etc/cobald
|
VOLUME /etc/cobald
|
||||||
|
|
||||||
COPY cobald-entrypoint.sh /usr/local/sbin/docker-entrypoint.sh
|
RUN mkdir -p /usr/local/lib/entrypoints.d/
|
||||||
|
|
||||||
RUN chmod 755 /usr/local/sbin/docker-entrypoint.sh
|
COPY init-cobaldmodules.sh /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||||
|
|
||||||
ENTRYPOINT /usr/local/sbin/docker-entrypoint.sh
|
RUN chmod 755 /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh
|
||||||
|
|
||||||
|
COPY start-drone /usr/local/bin/start-drone
|
||||||
|
COPY 28-sync-container-slurmd /etc/docker-init.d/28-sync-container-slurmd
|
||||||
|
RUN chmod 755 /usr/local/bin/start-drone /etc/docker-init.d/28-sync-container-slurmd
|
||||||
|
|
||||||
|
RUN echo -e "#!/bin/sh\npython3 -m cobald.daemon /etc/cobald/config.yaml" >> /etc/docker-init.d/70-cobald && chmod 755 /etc/docker-init.d/70-cobald
|
||||||
|
|
||||||
|
{% if cobald_docker_default_command | default(True) -%}
|
||||||
|
COPY cobald-entrypoint.sh /usr/local/sbin/cobald-entrypoint.sh
|
||||||
|
|
||||||
|
RUN chmod 755 /usr/local/sbin/cobald-entrypoint.sh
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/usr/local/sbin/cobald-entrypoint.sh" ]
|
||||||
|
|
||||||
|
RUN yum -y install iproute &&\
|
||||||
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
USER cobald
|
USER cobald
|
||||||
|
|
||||||
CMD "python3 -m cobald.daemon /etc/cobald/config.yaml"
|
STOPSIGNAL SIGINT
|
||||||
|
|
||||||
|
# CMD "python3 -m cobald.daemon /etc/cobald/config.yaml"
|
||||||
|
CMD /etc/docker-init.d/60-cobald
|
||||||
|
{%- endif %}
|
1829
roles/cobald/templates/telegraf.conf.j2
Normal file
1829
roles/cobald/templates/telegraf.conf.j2
Normal file
File diff suppressed because it is too large
Load Diff
12
roles/cobald/vars/cobald-slurm.yml
Normal file
12
roles/cobald/vars/cobald-slurm.yml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
cobald_image_tag: slurm
|
||||||
|
cobald_docker_base_image: "{{slurm.base_image}}"
|
||||||
|
cobald_docker_default_command: False
|
||||||
|
cobald_docker_network: "{{slurm.network}}"
|
||||||
|
cobald_domainname: "{{slurm.domain}}"
|
||||||
|
cobald_slurm_mounts: "{{slurm.mounts}}"
|
||||||
|
#- /container/volumes/slurm/:/etc/slurm/:rw
|
||||||
|
##- "{{slurm_cfg_path | mandatory}}:/etc/slurm/:rw"
|
||||||
|
#- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||||
|
## - "{{slurm_munge_path | mandatory}}:/etc/munge/munge.key:rw"
|
||||||
|
#- slurm-shared:/shared/:rw
|
||||||
|
## - "{{slurm_shared_path | mandatory}}:{{slurm_shared_target | default('/shared')}}:rw"
|
2
roles/cobald/vars/main.yml
Normal file
2
roles/cobald/vars/main.yml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
cobald_docker_network: "{{docker_network}}"
|
||||||
|
cobald_influx_hostname: "ed-influxdb"
|
@@ -8,7 +8,7 @@
|
|||||||
value: "15000"
|
value: "15000"
|
||||||
sysctl_file: /etc/sysctl.d/90-max_net_namespaces.conf
|
sysctl_file: /etc/sysctl.d/90-max_net_namespaces.conf
|
||||||
|
|
||||||
- name: "enable user thoto for fakeroot access"
|
- name: "enable user {{unpriv_user}} for fakeroot access"
|
||||||
lineinfile:
|
lineinfile:
|
||||||
line: "{{unpriv_user}}:4294836224:65536"
|
line: "{{unpriv_user}}:4294836224:65536"
|
||||||
dest: "{{item}}"
|
dest: "{{item}}"
|
@@ -1,5 +1,9 @@
|
|||||||
container_privileged: False
|
|
||||||
slurm_user: slurm
|
slurm_user: slurm
|
||||||
slurm_log_path_ctld: /var/log/slurm/slurmctld.log
|
slurm_log_path_ctld: /var/log/slurm/slurmctld.log
|
||||||
slurm_log_path_d: /var/log/slurm/slurmd.log
|
slurm_log_path_d: /var/log/slurm/slurmd.log
|
||||||
slurm_log_path_sched: /var/log/slurm/slurmsched.log
|
slurm_log_path_sched: /var/log/slurm/slurmsched.log
|
||||||
|
slurm_prefix: slurm
|
||||||
|
slurm_domain: slurm.local
|
||||||
|
container_privileged: False
|
||||||
|
docker_network: slurm
|
||||||
|
slurm_image_prefix: slurm
|
||||||
|
18
roles/slurm/files/docker-init
Normal file
18
roles/slurm/files/docker-init
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
function trp_term(){
|
||||||
|
echo pkill -P $pids
|
||||||
|
for j in $pids ; do
|
||||||
|
pkill -P $j
|
||||||
|
kill -SIGTERM $j
|
||||||
|
done
|
||||||
|
}
|
||||||
|
trap trp_term SIGINT SIGTERM
|
||||||
|
pids=""
|
||||||
|
for i in /etc/docker-init.d/* ; do
|
||||||
|
[ ! -f $i ] && break
|
||||||
|
$i &
|
||||||
|
pids="$pids $!"
|
||||||
|
done
|
||||||
|
wait $pids
|
||||||
|
|
||||||
|
# TODO: call start scripts like "foo.sh start" and "foo.sh stop" to avoid pkill
|
7
roles/slurm/files/entry-munge.sh
Normal file
7
roles/slurm/files/entry-munge.sh
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ -f "/etc/munge/munge.key" ] ; then
|
||||||
|
chown munge:munge /etc/munge/munge.key
|
||||||
|
chmod 600 /etc/munge/munge.key
|
||||||
|
fi
|
@@ -1,9 +1,8 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
if [ -f "/etc/munge/munge.key" ] ; then
|
for i in /usr/local/lib/entrypoints.d/* ; do
|
||||||
chown munge:munge /etc/munge/munge.key
|
[ -f $i ] && /bin/sh $i || break
|
||||||
chmod 600 /etc/munge/munge.key
|
done
|
||||||
fi
|
|
||||||
|
|
||||||
exec "$@"
|
exec "${@:-/bin/bash}"
|
||||||
|
35
roles/slurm/files/slurm-base.Dockerfile
Normal file
35
roles/slurm/files/slurm-base.Dockerfile
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
FROM docker.io/library/centos:7
|
||||||
|
|
||||||
|
RUN yum install -y epel-release && \
|
||||||
|
yum install -y slurm && \
|
||||||
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||||
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
RUN mkdir -p /usr/local/lib/entrypoints.d/
|
||||||
|
|
||||||
|
COPY --chown=root:root entry-munge.sh /usr/local/lib/entrypoints.d/10-munge.sh
|
||||||
|
COPY --chown=root:root entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||||
|
|
||||||
|
RUN chmod 755 /usr/local/lib/entrypoints.d/10-munge.sh && \
|
||||||
|
chmod 755 /usr/local/sbin/entrypoint.sh
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||||
|
|
||||||
|
ARG slurmuser=slurm
|
||||||
|
ENV slurmuser=${slurmuser}
|
||||||
|
|
||||||
|
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
||||||
|
slurm-setuser -u $slurmuser -g $slurmuser -y
|
||||||
|
|
||||||
|
COPY docker-init /usr/local/sbin/docker-init
|
||||||
|
RUN mkdir /etc/docker-init.d && chmod 755 /usr/local/sbin/docker-init
|
||||||
|
COPY start-scripts/10-munge /etc/docker-init.d/10-munge
|
||||||
|
RUN chmod 755 /etc/docker-init.d/10-munge
|
||||||
|
|
||||||
|
ARG moreusers
|
||||||
|
RUN function mu { [ -z "$1" ] || useradd -d $2 -m --no-log-init --system $1 ;};\
|
||||||
|
echo "${moreusers}" | tr ',' '\n' | while read i ; do mu $i ; done
|
||||||
|
|
||||||
|
CMD /usr/local/sbin/docker-init
|
@@ -1,43 +0,0 @@
|
|||||||
FROM docker.io/library/centos:7 as base
|
|
||||||
|
|
||||||
RUN yum install -y epel-release && \
|
|
||||||
yum install -y slurm && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
|
||||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
|
||||||
|
|
||||||
ARG slurmuser=slurm
|
|
||||||
ENV slurmuser=${slurmuser}
|
|
||||||
|
|
||||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|
||||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
|
||||||
|
|
||||||
FROM base as slurmd
|
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmd && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
|
||||||
slurmd -D 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
|
||||||
FROM base as slurmctld
|
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmctld && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
|
||||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
@@ -1,32 +1,11 @@
|
|||||||
FROM docker.io/library/centos:7 as base
|
FROM slurm:base
|
||||||
|
|
||||||
RUN yum install -y epel-release && \
|
|
||||||
yum install -y slurm && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
|
||||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
|
||||||
|
|
||||||
ARG slurmuser=slurm
|
|
||||||
ENV slurmuser=${slurmuser}
|
|
||||||
|
|
||||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|
||||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmctld && \
|
RUN yum install -y slurm-slurmctld && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
COPY start-scripts/20-slurmctld /etc/docker-init.d/20-slurmctld
|
||||||
|
RUN chmod 755 /etc/docker-init.d/20-slurmctld
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
|
||||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
@@ -1,32 +1,14 @@
|
|||||||
FROM docker.io/library/centos:7
|
FROM slurm:base
|
||||||
|
|
||||||
RUN yum install -y epel-release && \
|
|
||||||
yum install -y slurm && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
|
||||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
|
||||||
|
|
||||||
ARG slurmuser=slurm
|
|
||||||
ENV slurmuser=${slurmuser}
|
|
||||||
|
|
||||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|
||||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmd && \
|
RUN yum install -y slurm-slurmd && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
COPY start-scripts/30-slurmd /etc/docker-init.d/30-slurmd
|
||||||
|
RUN chmod 755 /etc/docker-init.d/30-slurmd
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
RUN yum install -y singularity && \
|
||||||
slurmd -D 2>/dev/null 1>/dev/null & \
|
yum clean all && rm -rf /var/cache/yum
|
||||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
2
roles/slurm/files/start-scripts/10-munge
Normal file
2
roles/slurm/files/start-scripts/10-munge
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
exec su -s /bin/sh -c "munged -F" munge
|
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
4
roles/slurm/files/start-scripts/20-slurmctld
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null &
|
||||||
|
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||||
|
|
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
4
roles/slurm/files/start-scripts/30-slurmd
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
slurmd -D 2>/dev/null 1>/dev/null &
|
||||||
|
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH}
|
||||||
|
|
@@ -1,3 +1,3 @@
|
|||||||
- name: reconfigure slurm
|
- name: reconfigure slurm
|
||||||
command:
|
shell:
|
||||||
cmd: docker container exec -it slurm-ctl scontrol reconfigure
|
cmd: "docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure || docker container restart {{slurm_prefix}}-ctl && docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure"
|
||||||
|
@@ -1,31 +1,29 @@
|
|||||||
- file:
|
- name: build slurm base docker image
|
||||||
path: "/container/docker-images/{{item}}"
|
include_tasks: dockerimage_build.yml
|
||||||
state: directory
|
vars:
|
||||||
owner: "{{unpriv_user}}"
|
slurm_image_prefix: "{{slurm_base_image_prefix | default('slurm') }}"
|
||||||
group: docker
|
image_name: base
|
||||||
|
dockerfile: "{{lookup('file', 'slurm-base.Dockerfile')}}"
|
||||||
|
files:
|
||||||
|
- dest: entrypoint.sh
|
||||||
|
content: "{{ lookup('file', 'entrypoint.sh') }}"
|
||||||
|
- dest: entry-munge.sh
|
||||||
|
content: "{{ lookup('file', 'entry-munge.sh') }}"
|
||||||
|
- dest: docker-init
|
||||||
|
content: "{{ lookup('file', 'docker-init') }}"
|
||||||
|
- dest: start-scripts/10-munge
|
||||||
|
content: "{{ lookup('file', 'start-scripts/10-munge') }}"
|
||||||
|
image_args:
|
||||||
|
moreusers: >-
|
||||||
|
{% for a in slurm_user_accounts | default([]) -%}
|
||||||
|
{{a['name']}} {{a['dir']}}{{loop.last | ternary('',',')}}
|
||||||
|
{%- endfor %}
|
||||||
|
when: not slurm_baseimg_build_chg | default(False)
|
||||||
|
|
||||||
- copy:
|
- set_fact:
|
||||||
src: "{{item}}.Dockerfile"
|
slurm_baseimg_build_chg:
|
||||||
dest: "/container/docker-images/{{item}}/Dockerfile"
|
"{{(slurm_baseimg_build_chg | default(False)) or
|
||||||
owner: "{{unpriv_user}}"
|
slurm_img_build.changed}}"
|
||||||
group: docker
|
|
||||||
register: slurm_cp_dockerfile
|
|
||||||
|
|
||||||
- copy:
|
|
||||||
src: "entrypoint.sh"
|
|
||||||
dest: "/container/docker-images/{{item}}/entrypoint.sh"
|
|
||||||
owner: root
|
|
||||||
group: root
|
|
||||||
mode: u=rwx,g=rx,o=rx
|
|
||||||
register: slurm_cp_entrypt
|
|
||||||
|
|
||||||
- docker_image:
|
|
||||||
name: "slurm-{{item}}"
|
|
||||||
# pull: False
|
|
||||||
build:
|
|
||||||
pull: False
|
|
||||||
path: "/container/docker-images/{{item}}"
|
|
||||||
# target: "{{item}}" # unsupported on old docker-py versions as in el7
|
|
||||||
source: build
|
|
||||||
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}"
|
|
||||||
|
|
||||||
|
- name: "build slurm base docker image {{image_name}}"
|
||||||
|
include_tasks: dockerimage_build.yml
|
||||||
|
43
roles/slurm/tasks/dockerimage_build.yml
Normal file
43
roles/slurm/tasks/dockerimage_build.yml
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
- name: create directories for docker image build
|
||||||
|
file:
|
||||||
|
path: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/{{item}}"
|
||||||
|
state: directory
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
loop: "{{ [''] + (files | map(attribute='dest') | map('dirname') |
|
||||||
|
unique | select | list) }}"
|
||||||
|
|
||||||
|
- name: "copy Dockerfile {{slurm_image_prefix}}:{{image_name}}"
|
||||||
|
copy:
|
||||||
|
content: "{{dockerfile}}"
|
||||||
|
dest: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/Dockerfile"
|
||||||
|
owner: "{{unpriv_user}}"
|
||||||
|
group: docker
|
||||||
|
register: slurm_cp_dockerfile
|
||||||
|
|
||||||
|
- name: copy requisite files
|
||||||
|
copy:
|
||||||
|
content: "{{ item.content }}"
|
||||||
|
dest: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/{{item.dest}}"
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: u=rwx,g=rx,o=rx
|
||||||
|
loop: "{{ files | default([]) }}"
|
||||||
|
loop_control:
|
||||||
|
label: "{{ item.dest }}"
|
||||||
|
register: slurm_cp_files
|
||||||
|
|
||||||
|
- name: "build docker image {{slurm_image_prefix}}:{{image_name}}"
|
||||||
|
docker_image:
|
||||||
|
name: "{{slurm_image_prefix}}"
|
||||||
|
tag: "{{image_name}}"
|
||||||
|
# pull: False
|
||||||
|
build:
|
||||||
|
args: "{{image_args | default(omit)}}"
|
||||||
|
pull: False
|
||||||
|
path: "/container/docker-images/{{slurm_image_prefix}}-{{image_name}}/"
|
||||||
|
source: build
|
||||||
|
force_source: "{{slurm_cp_dockerfile.changed or
|
||||||
|
slurm_cp_files.changed or
|
||||||
|
slurm_baseimg_build_chg | default(False) }}"
|
||||||
|
register: slurm_img_build
|
35
roles/slurm/tasks/host-config.yml
Normal file
35
roles/slurm/tasks/host-config.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# TODO: this does not work quite right since slurm-ctl does not reach the host
|
||||||
|
# system. sinfo, scontrol etc. work but srun does not!
|
||||||
|
|
||||||
|
- name: "get addresses from docker network"
|
||||||
|
docker_network_info:
|
||||||
|
name: "{{ docker_network }}"
|
||||||
|
register: slurm_network_data
|
||||||
|
|
||||||
|
- name: link host slurm config
|
||||||
|
file:
|
||||||
|
path: "/etc/slurm/slurm.conf"
|
||||||
|
src: "/container/volumes/slurm/slurm.conf"
|
||||||
|
force: True
|
||||||
|
state: link
|
||||||
|
backup: True
|
||||||
|
|
||||||
|
- name: create slurm user
|
||||||
|
user:
|
||||||
|
name: slurm
|
||||||
|
system: True
|
||||||
|
|
||||||
|
- name: place entry of slurm-ctl in host /etc/hosts
|
||||||
|
lineinfile:
|
||||||
|
line: "{{slurm_network_data.network.Containers | dict2items
|
||||||
|
| json_query('[?value.Name==`slurm-ctl`].value.IPv4Address') | first
|
||||||
|
| ipaddr('address') }}\tslurm-ctl"
|
||||||
|
regexp: "^(\\S*)(\\s*)slurm-ctl$"
|
||||||
|
path: /etc/hosts
|
||||||
|
backup: True
|
||||||
|
|
||||||
|
- name: start munge locally
|
||||||
|
service:
|
||||||
|
name: munge
|
||||||
|
enabled: True
|
||||||
|
state: started
|
@@ -3,10 +3,27 @@
|
|||||||
name: [ slurm, slurm-doc ]
|
name: [ slurm, slurm-doc ]
|
||||||
state: present
|
state: present
|
||||||
|
|
||||||
- include_tasks: dockerimage.yml
|
- name: build docker images for slurm
|
||||||
|
include_tasks:
|
||||||
|
file: dockerimage.yml
|
||||||
loop:
|
loop:
|
||||||
- slurmctld
|
- name: slurmctld
|
||||||
- slurmd
|
dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}"
|
||||||
|
files:
|
||||||
|
- dest: start-scripts/20-slurmctld
|
||||||
|
content: "{{ lookup('file', 'start-scripts/20-slurmctld') }}"
|
||||||
|
- name: slurmd
|
||||||
|
dockerfile: "{{ lookup('file', 'slurmd.Dockerfile') }}"
|
||||||
|
files:
|
||||||
|
- dest: start-scripts/30-slurmd
|
||||||
|
content: "{{ lookup('file', 'start-scripts/30-slurmd') }}"
|
||||||
|
vars:
|
||||||
|
image_name: "{{image.name | default(omit) }}"
|
||||||
|
dockerfile: "{{image.dockerfile | default(omit) }}"
|
||||||
|
files: "{{image.files | default(omit) }}"
|
||||||
|
loop_control:
|
||||||
|
loop_var: image
|
||||||
|
label: "{{ image.name }}"
|
||||||
|
|
||||||
- name: generate munge key
|
- name: generate munge key
|
||||||
shell:
|
shell:
|
||||||
@@ -20,7 +37,8 @@
|
|||||||
group: munge
|
group: munge
|
||||||
mode: u=rw,g=,o=
|
mode: u=rw,g=,o=
|
||||||
|
|
||||||
- file:
|
- name: create munge key directory for containers
|
||||||
|
file:
|
||||||
path: /container/volumes/munge
|
path: /container/volumes/munge
|
||||||
state: directory
|
state: directory
|
||||||
owner: munge
|
owner: munge
|
||||||
@@ -35,10 +53,18 @@
|
|||||||
src: /etc/munge/munge.key
|
src: /etc/munge/munge.key
|
||||||
dest: /container/volumes/munge/munge.key
|
dest: /container/volumes/munge/munge.key
|
||||||
|
|
||||||
- file:
|
- name: make slurm directory
|
||||||
|
file:
|
||||||
path: /container/volumes/slurm/
|
path: /container/volumes/slurm/
|
||||||
state: directory
|
state: directory
|
||||||
|
|
||||||
|
- name: "create docker network to make service discovery work"
|
||||||
|
docker_network:
|
||||||
|
name: "{{ docker_network }}"
|
||||||
|
state: present
|
||||||
|
register: slurm_network_data
|
||||||
|
tags: slurm-config
|
||||||
|
|
||||||
- name: upload slurm config
|
- name: upload slurm config
|
||||||
template:
|
template:
|
||||||
force: true
|
force: true
|
||||||
@@ -47,59 +73,66 @@
|
|||||||
loop:
|
loop:
|
||||||
- slurm.conf
|
- slurm.conf
|
||||||
- cgroup.conf
|
- cgroup.conf
|
||||||
|
vars:
|
||||||
|
slurm_exec_node_cores: 3
|
||||||
|
slurm_exec_node_mem: 5000 # RealMemory=5964
|
||||||
|
slurm_alloc_nodes_default:
|
||||||
|
- name: "{{slurm_prefix+'-submit1'}}"
|
||||||
|
- name: "{{ inventory_hostname }}"
|
||||||
|
addr: "{{ slurm_network_data.network.IPAM.Config[0].Gateway }}"
|
||||||
|
alloc_nodes: "{{ slurm_alloc_nodes_default + extra_nodes | default([])}}"
|
||||||
|
partitions:
|
||||||
|
- name: cobald
|
||||||
|
nodeprefix: drone
|
||||||
|
num_nodes: 10
|
||||||
|
node_cores: 3
|
||||||
|
node_mem: 4900
|
||||||
|
port: 16818
|
||||||
|
initstate: FUTURE
|
||||||
notify: reconfigure slurm
|
notify: reconfigure slurm
|
||||||
tags: [ slurm-config ]
|
tags: slurm-config
|
||||||
|
|
||||||
- name: "create docker network to make service discovery work"
|
|
||||||
docker_network:
|
|
||||||
name: slurm
|
|
||||||
state: present
|
|
||||||
|
|
||||||
- name: "create docker volume for shared access between nodes"
|
- name: "create docker volume for shared access between nodes"
|
||||||
docker_volume:
|
docker_volume:
|
||||||
name: slurm-shared
|
name: slurm-shared
|
||||||
state: present
|
state: present
|
||||||
|
|
||||||
- set_fact:
|
|
||||||
slurm_nodes: # default nodes: controller and submit machine
|
|
||||||
- machine: ctl
|
|
||||||
image: slurm-slurmctld
|
|
||||||
- machine: submit1
|
|
||||||
image: slurm-slurmd
|
|
||||||
extra_mounts:
|
|
||||||
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
|
||||||
tags: [ slurm-config ]
|
|
||||||
|
|
||||||
# TODO: reserve some address using docker_network_info and assign as aux
|
|
||||||
# address to enable slurmctld to get a static address in order to be
|
|
||||||
# reachable from slurm running on docker host to enable submitting jobs.
|
|
||||||
|
|
||||||
- name: run slurm docker containers
|
- name: run slurm docker containers
|
||||||
docker_container:
|
docker_container:
|
||||||
name: "slurm-{{item.machine}}"
|
name: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||||
hostname: "slurm-{{item.machine}}"
|
hostname: "{{ slurm_prefix }}-{{ item.machine }}"
|
||||||
domainname: "slurm.local"
|
domainname: "{{ slurm_domain }}"
|
||||||
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||||
|
ports: "{{ item.exposed_ports | default([]) }}"
|
||||||
networks:
|
networks:
|
||||||
- name: "slurm"
|
- name: "{{ docker_network }}"
|
||||||
|
aliases: "{{ item.aliases | default(omit) }}"
|
||||||
env:
|
env:
|
||||||
slurmuser: "{{slurm_user}}"
|
slurmuser: "{{ slurm_user }}"
|
||||||
image: "{{item.image}}"
|
image: "{{ item.image }}"
|
||||||
state: started
|
state: started
|
||||||
detach: True
|
detach: True
|
||||||
cleanup: True
|
cleanup: True
|
||||||
privileged: "{{ container_privileged | bool }}"
|
privileged: "{{ container_privileged | bool }}"
|
||||||
networks_cli_compatible: True
|
networks_cli_compatible: True
|
||||||
vars:
|
interactive: True
|
||||||
default_mounts:
|
vars: # see vars/main.yml
|
||||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}"
|
||||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
loop: "{{ slurm_nodes_all }}"
|
||||||
- slurm-shared:/shared/:rw
|
loop_control:
|
||||||
slurm_nodes_all: | # add execute nodes
|
label: "{{slurm_prefix}}-{{ item.machine }}"
|
||||||
{% for i in range(1, 4) -%}
|
tags: slurm-config
|
||||||
{% set _ = slurm_nodes.extend([
|
|
||||||
{'machine':'exec%s'|format(i), 'image': 'slurm-slurmd'}]) -%}
|
- name: configure host system to integrate into slurm cluster
|
||||||
{%- endfor %}
|
import_tasks: host-config.yml
|
||||||
{{ slurm_nodes }}
|
when: slurm_hostsystem_cluster_access | default(False)
|
||||||
loop: "{{slurm_nodes_all}}"
|
|
||||||
tags: [ slurm-config ]
|
- name: export facts about slurm cluster to be used by other modules
|
||||||
|
set_fact:
|
||||||
|
slurm:
|
||||||
|
user: "{{slurm_user}}"
|
||||||
|
domain: "{{slurm_domain}}"
|
||||||
|
base_image: "slurm:base"
|
||||||
|
mounts: "{{slurm_default_mounts}}"
|
||||||
|
network: "{{docker_network}}"
|
||||||
|
tags: always
|
||||||
|
@@ -9,6 +9,8 @@ ControlMachine=slurm-ctl
|
|||||||
AuthType=auth/munge
|
AuthType=auth/munge
|
||||||
#CheckpointType=checkpoint/none
|
#CheckpointType=checkpoint/none
|
||||||
CryptoType=crypto/munge
|
CryptoType=crypto/munge
|
||||||
|
CommunicationParameters=NoAddrCache
|
||||||
|
SlurmctldParameters=enable_configless
|
||||||
#DisableRootJobs=NO
|
#DisableRootJobs=NO
|
||||||
#EnforcePartLimits=NO
|
#EnforcePartLimits=NO
|
||||||
#Epilog=
|
#Epilog=
|
||||||
@@ -103,8 +105,10 @@ Waittime=0
|
|||||||
#MaxMemPerCPU=0
|
#MaxMemPerCPU=0
|
||||||
#SchedulerTimeSlice=30
|
#SchedulerTimeSlice=30
|
||||||
SchedulerType=sched/backfill
|
SchedulerType=sched/backfill
|
||||||
SelectType=select/linear
|
# SelectType=select/linear
|
||||||
|
SelectType=select/cons_res
|
||||||
#SelectTypeParameters=
|
#SelectTypeParameters=
|
||||||
|
SelectTypeParameters=CR_CORE
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# JOB PRIORITY
|
# JOB PRIORITY
|
||||||
@@ -163,6 +167,27 @@ SlurmSchedLogFile={{slurm_log_path_sched}}
|
|||||||
#
|
#
|
||||||
#
|
#
|
||||||
# COMPUTE NODES
|
# COMPUTE NODES
|
||||||
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
|
NodeName=slurm-exec[1-{{ num_nodes }}] CPUs={{ slurm_exec_node_cores }} {{''
|
||||||
NodeName=slurm-submit1 CPUs=1 State=UNKNOWN
|
}} RealMemory={{ slurm_exec_node_mem }} {{''
|
||||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes=slurm-submit1 Default=YES MaxTime=INFINITE State=UP
|
}} CoresPerSocket={{ slurm_exec_node_cores }} State=UNKNOWN
|
||||||
|
{% for p in partitions | default([]) %}
|
||||||
|
NodeName={{ p.nodeprefix }}[1-{{ p.num_nodes }}] CPUs={{ p.node_cores }} {{''
|
||||||
|
}} RealMemory={{ p.node_mem }} {{''
|
||||||
|
}} CoresPerSocket={{ p.node_cores }} {{''
|
||||||
|
}} {%- if p.port is defined %} Port={{ p.port}} {% endif %}{{''
|
||||||
|
}} State={{ p.initstate | default('UNKNOWN') }}
|
||||||
|
{% endfor %}
|
||||||
|
{% for i in alloc_nodes -%}
|
||||||
|
NodeName={{i.name}}
|
||||||
|
{%- if i.hostname is defined %} NodeHostname={{i.hostname}} {% endif %}
|
||||||
|
{%- if i.addr is defined %} NodeAddr={{i.addr}} {% endif %}
|
||||||
|
State=UNKNOWN
|
||||||
|
{% endfor %}
|
||||||
|
PartitionName=container Nodes=slurm-exec[1-{{num_nodes}}] {{ ''
|
||||||
|
}} AllocNodes={{alloc_nodes |map(attribute='name') | join(',')}} {{ ''
|
||||||
|
}} Default=YES MaxTime=INFINITE State=UP
|
||||||
|
{% for p in partitions | default([]) %}
|
||||||
|
PartitionName={{ p.name }} Nodes={{ p.nodeprefix }}[1-{{ p.num_nodes }}] {{ ''
|
||||||
|
}} AllocNodes={{alloc_nodes |map(attribute='name') | join(',')}} {{ ''
|
||||||
|
}} MaxTime=INFINITE State=UP
|
||||||
|
{% endfor %}
|
||||||
|
21
roles/slurm/vars/main.yml
Normal file
21
roles/slurm/vars/main.yml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
slurm_nodes_std: # default nodes: controller and submit machine
|
||||||
|
- machine: ctl
|
||||||
|
image: slurm:slurmctld
|
||||||
|
exposed_ports: [ "6817:6817/tcp" ]
|
||||||
|
- machine: submit1
|
||||||
|
image: slurm:slurmd
|
||||||
|
extra_mounts:
|
||||||
|
- "/home/{{unpriv_user}}/job3/:/mnt/:rw"
|
||||||
|
slurm_nodes_exec: | # extend range to execute nodes list
|
||||||
|
{% set slurm_nodes_exec = [] %}
|
||||||
|
{% for i in range(1, num_nodes+1) -%}
|
||||||
|
{% set _ = slurm_nodes_exec.extend([
|
||||||
|
{'machine':'exec%s'|format(i), 'image': 'slurm:slurmd',
|
||||||
|
'aliases':['drone%s'|format(i)]}]) -%}
|
||||||
|
{%- endfor %}
|
||||||
|
{{ slurm_nodes_exec }}
|
||||||
|
slurm_default_mounts:
|
||||||
|
- /container/volumes/slurm/:/etc/slurm/:rw
|
||||||
|
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||||
|
- slurm-shared:/shared/:rw
|
||||||
|
|
Reference in New Issue
Block a user