diff --git a/base.yml b/base.yml new file mode 100644 index 0000000..293d672 --- /dev/null +++ b/base.yml @@ -0,0 +1,19 @@ +--- +- hosts: all + tasks: + - name: "install epel repo" # for htop etc. + yum: + name: epel-release + state: present + + - name: "install tools" + yum: + name: [ vim-enhanced, htop, screen, bind-utils, nmap-ncat, net-tools ] + state: present + + - name: "install ssh-key" + authorized_key: + user: "{{cfg_unpriv_user}}" + key: "{{cfg_ssh_key}}" + state: present + diff --git a/cobald.yml b/cobald.yml new file mode 100644 index 0000000..a42512a --- /dev/null +++ b/cobald.yml @@ -0,0 +1,60 @@ +--- +- hosts: slurm, cobald + vars: + container_privileged: True + slurm_num_nodes: 10 + tasks: + - name: "setup docker" + import_role: name=docker + tags: docker + + - name: "get facts from existing cobald instance (i.e. hostname)" + include_role: + name: cobald + tasks_from: facts + apply: + tags: slurm, cobald, slurm-config + tags: slurm, cobald, slurm-config + vars: + container_name: cobald + + - name: "setup slurm test environment in docker containers" + include_role: + name: slurm + apply: + tags: slurm + vars: + slurm_user: slurm # or root + slurm_user_accounts: + - name: cobald + dir: /var/lib/cobald + num_nodes: "{{slurm_num_nodes}}" + extra_nodes: + - name: cobald + hostname: "{{cobald_container_hostname}}" # from cobald/facts.yml above + # hostname is used as NodeHostname, which is used slurms "networking + # code" (https://bugs.schedmd.com/show_bug.cgi?id=8615). + # It works either way around, but one of NodeName or NodeHostname has + # to match the container name (-n flag, not --hostname) since when + # submitting tasks to the slurm controller, it matches access + # permissions against a reverse lookup of the submitting ip address. + # Docker always and unconfigureably resolves the container ip in any + # network to containername.netname, where containername is the + # containers runtime name (not hostname supplied!) and netname is + # the network name in host environment. We should run our own dns... + docker_network: slurm + when: '"slurm" in group_names' + tags: slurm, cobald, influxdb, slurm-config + # tags: cobald requires some slurm facts, so cobald tag is included here + + - name: "install cobald" + include_role: + name: cobald + apply: + tags: cobald + vars: + cobald_slurm: True + container_name: cobald + # docker_network: slurm # overriden by vars/slurm.yml + when: '"cobald" in group_names' + tags: cobald, influxdb diff --git a/htcondor.yml b/htcondor.yml new file mode 100644 index 0000000..809497c --- /dev/null +++ b/htcondor.yml @@ -0,0 +1,32 @@ +--- +- hosts: htcondor + tasks: + - name: "install htcondor repo" + yum: + name: https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm + state: present + tags: htcondor + + - name: "install htcondor software " + yum: + name: htcondor-ce + state: present + tags: htcondor + + - name: "remove minicondor configuration" + yum: + name: minicondor + state: absent + tags: htcondor + + - name: "setup singularity" + import_tasks: "singularity.yml" + tags: singularity + + - name: "setup docker" + import_role: name=docker + tags: docker + + - name: "setup htcondor test environment in docker containers" + import_role: name=docker-htcondor + tags: htcondor-containered, htcondor diff --git a/inv.yml b/inv.yml index 6297cf2..619d459 100644 --- a/inv.yml +++ b/inv.yml @@ -6,12 +6,14 @@ all: ssh_args: -o ControlMaster=auto -o ControlPersist=60s # ansible_host: 192.168.122.139 unpriv_user: thoto + cfg_unpriv_user: thoto ed-c7-2: ansible_user: root ansible_host: ed-c7-2.virt.uller.thoto.net # ansible_host: 192.168.123.60 # + jumphost ssh_args: -o ControlMaster=auto -o ControlPersist=60s unpriv_user: thoto + cfg_unpriv_user: thoto children: htcondor: hosts: diff --git a/play.yml b/play.yml index f5b0ab2..19cdd79 100644 --- a/play.yml +++ b/play.yml @@ -1,116 +1,10 @@ --- -- hosts: all - vars_files: - - vars-auth.yml - - vars-influx.yml - tasks: - - name: "install epel repo" # for htop etc. - yum: - name: epel-release - state: present +- name: base setup + import_playbook: base.yml - - name: "install tools" - yum: - name: [ vim-enhanced, htop, screen, bind-utils, nmap-ncat, net-tools ] - state: present +- name: setup htcondor + import_playbook: htcondor.yml + when: '"htcondor" in group_names' - - name: "install ssh-key" - authorized_key: - user: thoto - key: "{{ssh_key}}" - state: present - -- hosts: htcondor - pre_tasks: - - name: "install htcondor repo" - yum: - name: https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm - state: present - tags: htcondor - - - name: "install htcondor software " - yum: - name: htcondor-ce - state: present - tags: htcondor - - - name: "remove minicondor configuration" - yum: - name: minicondor - state: absent - tags: htcondor - - - name: "setup singularity" - import_tasks: "singularity.yml" - tags: singularity - - roles: - - name: "setup docker" - role: docker - tags: docker - - - name: "setup htcondor test environment in docker containers" - role: docker-htcondor - tags: - - htcondor-containered - - htcondor - -- hosts: slurm, cobald - vars: - container_privileged: True - slurm_num_nodes: 10 - tasks: - - name: "setup docker" - import_role: name=docker - tags: docker - - - name: "get facts from existing cobald instance (i.e. hostname)" - include_role: - name: cobald - tasks_from: facts - apply: - tags: slurm, cobald, slurm-config - tags: slurm, cobald, slurm-config - vars: - container_name: cobald - - - name: "setup slurm test environment in docker containers" - include_role: - name: slurm - apply: - tags: slurm - vars: - slurm_user: slurm # or root - slurm_user_accounts: - - name: cobald - dir: /var/lib/cobald - num_nodes: "{{slurm_num_nodes}}" - extra_nodes: - - name: cobald - hostname: "{{cobald_container_hostname}}" # from cobald/facts.yml above - # hostname is used as NodeHostname, which is used slurms "networking - # code" (https://bugs.schedmd.com/show_bug.cgi?id=8615). - # It works either way around, but one of NodeName or NodeHostname has - # to match the container name (-n flag, not --hostname) since when - # submitting tasks to the slurm controller, it matches access - # permissions against a reverse lookup of the submitting ip address. - # Docker always and unconfigureably resolves the container ip in any - # network to containername.netname, where containername is the - # containers runtime name (not hostname supplied!) and netname is - # the network name in host environment. We should run our own dns... - docker_network: slurm - when: '"slurm" in group_names' - tags: slurm, cobald, slurm-config - # tags: cobald requires some slurm facts, so cobald tag is included here - - - name: "install cobald" - include_role: - name: cobald - apply: - tags: cobald - vars: - cobald_slurm: True - container_name: cobald - # docker_network: slurm # overriden by vars/slurm.yml - when: '"cobald" in group_names' - tags: cobald, influxdb +- name: setup slurm and cobald + import_playbook: cobald.yml diff --git a/roles/cobald/tasks/grafana.yml b/roles/cobald/tasks/grafana.yml index 7c94fa3..5b06663 100644 --- a/roles/cobald/tasks/grafana.yml +++ b/roles/cobald/tasks/grafana.yml @@ -14,11 +14,12 @@ - name: run grafana docker_container: name: ed-grafana - image: grafana/grafana:7.5.7 + image: docker.io/grafana/grafana:7.5.7 hostname: ed-grafana domainname: cobald.local networks: - name: "{{cobald_docker_network}}" + networks_cli_compatible: True published_ports: - "3000:3000" state: started @@ -42,7 +43,6 @@ version: Flux additional_secure_json_data: token: "{{influx_grafana_token.token}}" - register: das - community.grafana.grafana_dashboard: grafana_url: http://localhost:3000 diff --git a/roles/cobald/tasks/influxdb.yml b/roles/cobald/tasks/influxdb.yml index d02318d..3c58bbe 100644 --- a/roles/cobald/tasks/influxdb.yml +++ b/roles/cobald/tasks/influxdb.yml @@ -1,11 +1,12 @@ - name: run influxdb in docker container docker_container: name: ed-influxdb - image: influxdb + image: docker.io/library/influxdb:2.0 hostname: "{{influx_hostname}}" domainname: "{{influx_domainname}}" networks: - name: "{{ cobald_docker_network }}" + networks_cli_compatible: True published_ports: - "{{influx_pubport}}:8086" volumes: @@ -21,7 +22,6 @@ state: started detach: True cleanup: True - networks_cli_compatible: True - name: add ansible connection to influxdb container add_host: diff --git a/roles/cobald/tasks/main.yml b/roles/cobald/tasks/main.yml index dc90e49..4ef9df5 100644 --- a/roles/cobald/tasks/main.yml +++ b/roles/cobald/tasks/main.yml @@ -1,5 +1,6 @@ - include_vars: cobald-slurm.yml when: cobald_slurm | default(False) + tags: always - name: build cobald:slurm docker image include_role: diff --git a/roles/slurm/files/slurm.Dockerfile b/roles/slurm/files/slurm.Dockerfile deleted file mode 100644 index 36799a9..0000000 --- a/roles/slurm/files/slurm.Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -FROM docker.io/library/centos:7 as base - -RUN yum install -y epel-release && \ - yum install -y slurm && \ - yum clean all && rm -rf /var/cache/yum - -RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \ - yum clean all && rm -rf /var/cache/yum - -COPY entrypoint.sh /usr/local/sbin/entrypoint.sh - -RUN chown root:root /usr/local/sbin/entrypoint.sh && \ - chmod 755 /usr/local/sbin/entrypoint.sh - -ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ] - -ARG slurmuser=slurm -ENV slurmuser=${slurmuser} - -RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\ - slurm-setuser -u $slurmuser -g $slurmuser -y - -ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log" -ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log" -ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log" - -FROM base as slurmd - -RUN yum install -y slurm-slurmd && \ - yum clean all && rm -rf /var/cache/yum - -CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \ - slurmd -D 2>/dev/null 1>/dev/null & \ - tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })' - -FROM base as slurmctld - -RUN yum install -y slurm-slurmctld && \ - yum clean all && rm -rf /var/cache/yum - -CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \ - su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \ - tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'