diff --git a/play.yml b/play.yml index b56b6a0..e363563 100644 --- a/play.yml +++ b/play.yml @@ -1,6 +1,8 @@ --- - hosts: all - vars_files: ['vars-auth.yml'] + vars_files: + - vars-auth.yml + - vars-influx.yml tasks: - name: "install epel repo" # for htop etc. yum: @@ -61,16 +63,26 @@ - name: "setup docker" role: docker tags: docker + - name: "get facts from existing cobald instance (i.e. hostname)" + role: cobald_facts + vars: + container_name: cobald + tags: [ slurm, cobald ] - name: "setup slurm test environment in docker containers" role: slurm vars: slurm_user: slurm # or root num_nodes: "{{slurm_num_nodes}}" + extra_nodes: + - "{{cobald_container_hostname}}" # from cobald_facts, read or generated + docker_network: slurm when: '"slurm" in group_names' tags: slurm - name: "install cobald" role: cobald vars: - docker_network: slurm + cobald_slurm: True + container_name: cobald + # docker_network: slurm # overriden by vars/slurm.yml when: '"cobald" in group_names' tags: cobald diff --git a/roles/cobald/files/cobald-entrypoint.sh b/roles/cobald/files/cobald-entrypoint.sh index 4bbae50..91e6c9e 100644 --- a/roles/cobald/files/cobald-entrypoint.sh +++ b/roles/cobald/files/cobald-entrypoint.sh @@ -1,7 +1,7 @@ #!/bin/sh -[ -f /usr/local/lib/cobaldmodules/setup.py -a \ - -d /usr/local/lib/cobaldmodules/cobaldmodules ] && \ - pip3 install --no-deps --editable /usr/local/lib/cobaldmodules +for i in /usr/local/lib/entrypoints.d/* ; do + [ -f $i ] && /bin/sh $i || break +done exec "${@:-/bin/bash}" diff --git a/roles/cobald/files/init-cobaldmodules.sh b/roles/cobald/files/init-cobaldmodules.sh new file mode 100644 index 0000000..9930042 --- /dev/null +++ b/roles/cobald/files/init-cobaldmodules.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +[ -f /usr/local/lib/cobaldmodules/setup.py -a \ + -d /usr/local/lib/cobaldmodules/cobaldmodules ] && \ + pip3 install --no-deps --editable /usr/local/lib/cobaldmodules diff --git a/roles/cobald/tasks/main.yml b/roles/cobald/tasks/main.yml index 5008c85..7c5a3f9 100644 --- a/roles/cobald/tasks/main.yml +++ b/roles/cobald/tasks/main.yml @@ -1,33 +1,41 @@ +- include_vars: cobald-slurm.yml + when: cobald_slurm | default(False) + - file: - path: "/container/{{item}}/cobald/" + path: "/container/{{item.name}}/cobald{{item.pfx|default('')}}/" state: directory owner: "{{unpriv_user}}" group: docker loop: - - docker-images - - volumes + - name: docker-images + pfx: ".{{cobald_image_tag|default('latest')}}" + - name: volumes -- copy: +- template: src: cobald.Dockerfile - dest: /container/docker-images/cobald/Dockerfile + dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/Dockerfile" owner: "{{unpriv_user}}" group: docker register: cobald_cp_dockerfile - copy: - src: cobald-entrypoint.sh - dest: /container/docker-images/cobald/cobald-entrypoint.sh + src: "{{item}}" + dest: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/{{item}}" owner: "{{unpriv_user}}" group: docker mode: 0755 + with_items: + - cobald-entrypoint.sh + - init-cobaldmodules.sh register: cobald_cp_files - docker_image: name: "cobald" + tag: "{{cobald_image_tag|default('latest')}}" # pull: False build: pull: False - path: "/container/docker-images/cobald/" + path: "/container/docker-images/cobald.{{cobald_image_tag|default('latest')}}/" source: build force_source: "{{cobald_cp_dockerfile.changed or cobald_cp_files.changed}}" @@ -39,7 +47,7 @@ when: False - docker_network: - name: "{{docker_network}}" + name: "{{cobald_docker_network}}" # FIXME state: present # docker run -v $(pwd)/cobald-config-host:/etc/cobald -v $(pwd)/cobald:/cobald --rm -it cobald bash @@ -82,7 +90,7 @@ - name: run pip install docker_container: - image: cobald + image: "cobald:{{cobald_image_tag|default('latest')}}" name: "cobald-src-{{item.name}}-install" volumes: - "~{{unpriv_user}}/{{item.name}}-src:/usr/local/src/{{item.name}}:rw" @@ -102,29 +110,34 @@ - import_tasks: telegraf.yml -- docker_container_info: - name: cobald - register: cobald_container_info +- name: get cobald hostname + include_role: + name: cobald_facts + when: cobald_container_hostname is not defined -- docker_container: - name: cobald - image: cobald - hostname: |- - {{cobald_container_info.container.Config.Hostname | default('cobald-'+ - lookup('password', '/dev/null chars=ascii_lowercase length=6')) }} - domainname: cobald.local - volumes: +- name: run cobald container + docker_container: + name: "{{ container_name | default('cobald') }}" + image: "cobald:{{cobald_image_tag|default('latest')}}" + hostname: "{{cobald_container_hostname}}" + domainname: "{{ cobald_domainname | default('cobald.local')}}" + volumes: "{{default_mounts + cobald_mounts }}" + networks: + - name: "{{cobald_docker_network}}" + networks_cli_compatible: True +# env: +# slurmuser: "{{slurm_user}}" +# privileged: "{{ container_privileged | bool }}" + state: started + detach: True + cleanup: True + interactive: True + # command: python3 -m cobald.daemon /etc/cobald/config.yaml + vars: + default_mounts: "{{cobald_slurm_mounts | default([])}}" + cobald_mounts: - "~{{unpriv_user}}/cobald:/etc/cobald" # - /container/volumes/cobald:/etc/cobald:ro - "~{{unpriv_user}}/cobald/modules:/usr/local/src/cobaldmodules" - "~{{unpriv_user}}/cobald-src:/usr/local/src/cobald:ro" - "~{{unpriv_user}}/tardis-src:/usr/local/src/tardis:ro" - networks: - - name: "{{docker_network}}" - state: started - detach: True - cleanup: True - interactive: True - # command: /bin/bash - # python3 -m cobald.daemon /etc/cobald/config.yaml - command: python3 -m cobald.daemon /etc/cobald/config.yaml diff --git a/roles/cobald/files/cobald.Dockerfile b/roles/cobald/templates/cobald.Dockerfile similarity index 67% rename from roles/cobald/files/cobald.Dockerfile rename to roles/cobald/templates/cobald.Dockerfile index 1d1a7d5..b4256cd 100644 --- a/roles/cobald/files/cobald.Dockerfile +++ b/roles/cobald/templates/cobald.Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/library/centos:7 +FROM {{ cobald_docker_base_image | default("docker.io/library/centos:7") }} RUN yum update -y && \ yum install -y python3 git && pip3 install --upgrade pip && \ @@ -41,11 +41,20 @@ VOLUME /usr/local/src/cobaldmodules VOLUME /etc/cobald -COPY cobald-entrypoint.sh /usr/local/sbin/docker-entrypoint.sh +RUN mkdir -p /usr/local/lib/entrypoints.d/ -RUN chmod 755 /usr/local/sbin/docker-entrypoint.sh +COPY init-cobaldmodules.sh /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh -ENTRYPOINT [ "/usr/local/sbin/docker-entrypoint.sh" ] +RUN chmod 755 /usr/local/lib/entrypoints.d/50-init-cobaldmodules.sh + +RUN echo -e "#!/bin/sh\npython3 -m cobald.daemon /etc/cobald/config.yaml" >> /etc/docker-init.d/70-cobald && chmod 755 /etc/docker-init.d/70-cobald + +{% if cobald_docker_default_command | default(True) -%} +COPY cobald-entrypoint.sh /usr/local/sbin/cobald-entrypoint.sh + +RUN chmod 755 /usr/local/sbin/cobald-entrypoint.sh + +ENTRYPOINT [ "/usr/local/sbin/cobald-entrypoint.sh" ] RUN yum -y install iproute &&\ yum clean all && rm -rf /var/cache/yum @@ -54,4 +63,6 @@ USER cobald STOPSIGNAL SIGINT -CMD "python3 -m cobald.daemon /etc/cobald/config.yaml" +# CMD "python3 -m cobald.daemon /etc/cobald/config.yaml" +CMD /etc/docker-init.d/60-cobald +{%- endif %} diff --git a/roles/cobald/vars/cobald-slurm.yml b/roles/cobald/vars/cobald-slurm.yml new file mode 100644 index 0000000..ab38387 --- /dev/null +++ b/roles/cobald/vars/cobald-slurm.yml @@ -0,0 +1,12 @@ +cobald_image_tag: slurm +cobald_docker_base_image: "{{slurm.base_image}}" +cobald_docker_default_command: False +cobald_docker_network: "{{slurm.network}}" +cobald_domainname: "{{slurm.domain}}" +cobald_slurm_mounts: "{{slurm.mounts}}" +#- /container/volumes/slurm/:/etc/slurm/:rw +##- "{{slurm_cfg_path | mandatory}}:/etc/slurm/:rw" +#- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw +## - "{{slurm_munge_path | mandatory}}:/etc/munge/munge.key:rw" +#- slurm-shared:/shared/:rw +## - "{{slurm_shared_path | mandatory}}:{{slurm_shared_target | default('/shared')}}:rw" diff --git a/roles/cobald/vars/main.yml b/roles/cobald/vars/main.yml new file mode 100644 index 0000000..cf379ae --- /dev/null +++ b/roles/cobald/vars/main.yml @@ -0,0 +1 @@ +cobald_docker_network: "{{docker_network}}" diff --git a/roles/cobald_facts/tasks/main.yml b/roles/cobald_facts/tasks/main.yml new file mode 100644 index 0000000..0f07d95 --- /dev/null +++ b/roles/cobald_facts/tasks/main.yml @@ -0,0 +1,11 @@ +- block: + - docker_container_info: + name: "{{ container_name | mandatory }}" + register: cobald_container_info + + - set_fact: + cobald_container_hostname: |- + {{cobald_container_info.container.Config.Hostname | default('cobald-'+ + lookup('password', '/dev/null chars=ascii_lowercase length=6')) }} + when: cobald_container_hostname is not defined + diff --git a/roles/slurm/defaults/main.yml b/roles/slurm/defaults/main.yml index 56e8641..9e15182 100644 --- a/roles/slurm/defaults/main.yml +++ b/roles/slurm/defaults/main.yml @@ -3,3 +3,6 @@ slurm_user: slurm slurm_log_path_ctld: /var/log/slurm/slurmctld.log slurm_log_path_d: /var/log/slurm/slurmd.log slurm_log_path_sched: /var/log/slurm/slurmsched.log +slurm_prefix: slurm +slurm_domain: slurm.local +docker_network: slurm diff --git a/roles/slurm/handlers/main.yml b/roles/slurm/handlers/main.yml index b82ee7c..fd04fdc 100644 --- a/roles/slurm/handlers/main.yml +++ b/roles/slurm/handlers/main.yml @@ -1,3 +1,3 @@ - name: reconfigure slurm - command: - cmd: docker container exec -it slurm-ctl scontrol reconfigure + shell: + cmd: "docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure || docker container restart {{slurm_prefix}}-ctl && docker container exec -it {{slurm_prefix}}-ctl scontrol reconfigure" diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml index f39f72a..1beeff6 100644 --- a/roles/slurm/tasks/main.yml +++ b/roles/slurm/tasks/main.yml @@ -5,7 +5,7 @@ - include_role: name: slurm_dockerimage - loop: + loop: # FIXME: default(omit)! - name: slurmctld dockerfile: "{{ lookup('file', 'slurmctld.Dockerfile') }}" files: @@ -59,12 +59,14 @@ loop: - slurm.conf - cgroup.conf + vars: + alloc_nodes: "{{ [ slurm_prefix+'-submit1' ] + extra_nodes | default([])}}" notify: reconfigure slurm tags: [ slurm-config ] - name: "create docker network to make service discovery work" docker_network: - name: slurm + name: "{{ docker_network }}" state: present - name: "create docker volume for shared access between nodes" @@ -73,7 +75,7 @@ state: present - set_fact: - slurm_nodes: # default nodes: controller and submit machine + slurm_nodes_std: # default nodes: controller and submit machine - machine: ctl image: slurm:slurmctld exposed_ports: [ "6817:6817/tcp" ] @@ -81,10 +83,18 @@ image: slurm:slurmd extra_mounts: - "/home/{{unpriv_user}}/job3/:/mnt/:rw" -# - machine: slurm-cobald -# image: slurm-cobald -# extra_mounts: -# # TODO + slurm_nodes_exec: | # extend range to execute nodes list + {% set slurm_nodes_exec = slurm_nodes_exec | default([]) %} + {% for i in range(1, num_nodes+1) -%} + {% set _ = slurm_nodes_exec.extend([ + {'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%} + {%- endfor %} + {{ slurm_nodes_exec }} + slurm_default_mounts: + - /container/volumes/slurm/:/etc/slurm/:rw + - /container/volumes/munge/munge.key:/etc/munge/munge.key:rw + - slurm-shared:/shared/:rw + slurm_network: "{{docker_network}}" tags: [ slurm-config ] # TODO: reserve some address using docker_network_info and assign as aux @@ -93,16 +103,16 @@ - name: run slurm docker containers docker_container: - name: "slurm-{{item.machine}}" - hostname: "slurm-{{item.machine}}" - domainname: "slurm.local" - volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}" + name: "{{ slurm_prefix }}-{{ item.machine }}" + hostname: "{{ slurm_prefix }}-{{ item.machine }}" + domainname: "{{ slurm_domain }}" + volumes: "{{ slurm_default_mounts + ( item.extra_mounts | default([]) ) }}" ports: "{{ item.exposed_ports | default([]) }}" networks: - - name: "slurm" + - name: "{{ slurm_network }}" env: - slurmuser: "{{slurm_user}}" - image: "{{item.image}}" + slurmuser: "{{ slurm_user }}" + image: "{{ item.image }}" state: started detach: True cleanup: True @@ -110,15 +120,17 @@ networks_cli_compatible: True interactive: True vars: - default_mounts: - - /container/volumes/slurm/:/etc/slurm/:rw - - /container/volumes/munge/munge.key:/etc/munge/munge.key:rw - - slurm-shared:/shared/:rw - slurm_nodes_all: | # add execute nodes - {% for i in range(1, num_nodes+1) -%} - {% set _ = slurm_nodes.extend([ - {'machine':'exec%s'|format(i), 'image': 'slurm:slurmd'}]) -%} - {%- endfor %} - {{ slurm_nodes }} - loop: "{{slurm_nodes_all}}" + slurm_nodes_all: "{{ slurm_nodes_exec + slurm_nodes_std }}" + loop: "{{ slurm_nodes_all }}" + loop_control: + label: "{{slurm_prefix}}-{{ item.machine }}" tags: [ slurm-config ] + +- name: set facts to be used by other modules + set_fact: + slurm: + user: "{{slurm_user}}" + domain: "{{slurm_domain}}" + base_image: "slurm:base" + mounts: "{{slurm_default_mounts}}" + network: "{{docker_network}}" diff --git a/roles/slurm/templates/slurm.conf.j2 b/roles/slurm/templates/slurm.conf.j2 index 3b11bc9..ccda145 100644 --- a/roles/slurm/templates/slurm.conf.j2 +++ b/roles/slurm/templates/slurm.conf.j2 @@ -164,5 +164,7 @@ SlurmSchedLogFile={{slurm_log_path_sched}} # # COMPUTE NODES NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN -NodeName=slurm-submit1 CPUs=1 State=UNKNOWN -PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes=slurm-submit1 Default=YES MaxTime=INFINITE State=UP +{% for i in alloc_nodes -%} +NodeName={{i}} State=UNKNOWN +{% endfor %} +PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes={{alloc_nodes | join(',')}} Default=YES MaxTime=INFINITE State=UP diff --git a/roles/slurm/files/entrypoint.sh b/roles/slurm_dockerimage/files/entry-munge.sh similarity index 92% rename from roles/slurm/files/entrypoint.sh rename to roles/slurm_dockerimage/files/entry-munge.sh index 483b5e0..b4fb51a 100644 --- a/roles/slurm/files/entrypoint.sh +++ b/roles/slurm_dockerimage/files/entry-munge.sh @@ -5,5 +5,3 @@ if [ -f "/etc/munge/munge.key" ] ; then chown munge:munge /etc/munge/munge.key chmod 600 /etc/munge/munge.key fi - -exec "$@" diff --git a/roles/slurm_dockerimage/files/entrypoint.sh b/roles/slurm_dockerimage/files/entrypoint.sh new file mode 100644 index 0000000..5c3b431 --- /dev/null +++ b/roles/slurm_dockerimage/files/entrypoint.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -e + +for i in /usr/local/lib/entrypoints.d/* ; do + [ -f $i ] && /bin/sh $i || break +done + +exec "${@:-/bin/bash}" diff --git a/roles/slurm_dockerimage/files/slurm-base.Dockerfile b/roles/slurm_dockerimage/files/slurm-base.Dockerfile index c6f86df..4873efa 100644 --- a/roles/slurm_dockerimage/files/slurm-base.Dockerfile +++ b/roles/slurm_dockerimage/files/slurm-base.Dockerfile @@ -7,10 +7,12 @@ RUN yum install -y epel-release && \ RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \ yum clean all && rm -rf /var/cache/yum -# FIXME -COPY entrypoint.sh /usr/local/sbin/entrypoint.sh +RUN mkdir -p /usr/local/lib/entrypoints.d/ -RUN chown root:root /usr/local/sbin/entrypoint.sh && \ +COPY --chown=root:root entry-munge.sh /usr/local/lib/entrypoints.d/10-munge.sh +COPY --chown=root:root entrypoint.sh /usr/local/sbin/entrypoint.sh + +RUN chmod 755 /usr/local/lib/entrypoints.d/10-munge.sh && \ chmod 755 /usr/local/sbin/entrypoint.sh ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ] diff --git a/roles/slurm_dockerimage/tasks/dockerimage.yml b/roles/slurm_dockerimage/tasks/dockerimage.yml index 311ed06..29f977b 100644 --- a/roles/slurm_dockerimage/tasks/dockerimage.yml +++ b/roles/slurm_dockerimage/tasks/dockerimage.yml @@ -23,6 +23,8 @@ group: root mode: u=rwx,g=rx,o=rx loop: "{{ image.files | default([]) }}" + loop_control: + label: "{{ item.dest }}" register: slurm_cp_files - docker_image: diff --git a/roles/slurm_dockerimage/tasks/main.yml b/roles/slurm_dockerimage/tasks/main.yml index afc1f18..912f0eb 100644 --- a/roles/slurm_dockerimage/tasks/main.yml +++ b/roles/slurm_dockerimage/tasks/main.yml @@ -16,6 +16,7 @@ - file: slurm-base.Dockerfile perms: u=rw,g=r,o=r - file: entrypoint.sh + - file: entry-munge.sh - file: docker-init - file: start-scripts/10-munge when: not (slurm_baseimg_build_chg | default(False))