Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
8c1cd6e902 |
13
inv.yml
13
inv.yml
@@ -1,15 +1,12 @@
|
|||||||
all:
|
all:
|
||||||
hosts:
|
hosts:
|
||||||
ed-c7-1:
|
centos7VM:
|
||||||
ansible_user: root
|
ansible_user: root
|
||||||
ansible_host: ed-c7-1.virt.magni.thoto.net
|
ansible_host: localhost
|
||||||
|
ansible_port: 19322
|
||||||
ssh_args: -o ControlMaster=auto -o ControlPersist=60s
|
ssh_args: -o ControlMaster=auto -o ControlPersist=60s
|
||||||
# ansible_host: 192.168.122.139
|
unpriv_user: centos7 # gains privileges to use docker
|
||||||
unpriv_user: thoto
|
|
||||||
children:
|
children:
|
||||||
htcondor:
|
|
||||||
hosts:
|
|
||||||
ed-c7-1:
|
|
||||||
slurm:
|
slurm:
|
||||||
hosts:
|
hosts:
|
||||||
ed-c7-1:
|
centos7VM
|
||||||
|
43
play.yml
43
play.yml
@@ -1,46 +1,15 @@
|
|||||||
---
|
---
|
||||||
- hosts: all
|
- hosts: all
|
||||||
tasks:
|
tasks:
|
||||||
|
- name: "install epel-release"
|
||||||
|
yum:
|
||||||
|
name: [ epel-release ] # necessary to find htop => separate step
|
||||||
|
state: present
|
||||||
- name: "install tools"
|
- name: "install tools"
|
||||||
yum:
|
yum:
|
||||||
name: [ vim-enhanced, htop, screen, bind-utils, nmap-ncat, net-tools ]
|
name: [ vim-enhanced, htop, screen, tmux, bind-utils, nmap-ncat, net-tools ]
|
||||||
state: present
|
state: present
|
||||||
|
|
||||||
- hosts: htcondor
|
|
||||||
pre_tasks:
|
|
||||||
- name: "install htcondor repo"
|
|
||||||
yum:
|
|
||||||
name: https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm
|
|
||||||
state: present
|
|
||||||
tags: htcondor
|
|
||||||
|
|
||||||
- name: "install htcondor software "
|
|
||||||
yum:
|
|
||||||
name: htcondor-ce
|
|
||||||
state: present
|
|
||||||
tags: htcondor
|
|
||||||
|
|
||||||
- name: "remove minicondor configuration"
|
|
||||||
yum:
|
|
||||||
name: minicondor
|
|
||||||
state: absent
|
|
||||||
tags: htcondor
|
|
||||||
|
|
||||||
- name: "setup singularity"
|
|
||||||
import_tasks: "singularity.yml"
|
|
||||||
tags: singularity
|
|
||||||
|
|
||||||
roles:
|
|
||||||
- name: "setup docker"
|
|
||||||
role: docker
|
|
||||||
tags: docker
|
|
||||||
|
|
||||||
- name: "setup htcondor test environment in docker containers"
|
|
||||||
role: docker-htcondor
|
|
||||||
tags:
|
|
||||||
- htcondor-containered
|
|
||||||
- htcondor
|
|
||||||
|
|
||||||
- hosts: slurm
|
- hosts: slurm
|
||||||
vars:
|
vars:
|
||||||
container_privileged: True
|
container_privileged: True
|
||||||
@@ -51,6 +20,4 @@
|
|||||||
tags: docker
|
tags: docker
|
||||||
- name: "setup slurm test environment in docker containers"
|
- name: "setup slurm test environment in docker containers"
|
||||||
role: slurm
|
role: slurm
|
||||||
vars:
|
|
||||||
slurm_user: slurm # or root
|
|
||||||
tags: slurm
|
tags: slurm
|
||||||
|
@@ -1,3 +0,0 @@
|
|||||||
use ROLE:centralmanager
|
|
||||||
# ALLOW_WRITE per default on * -> following has no effect yet
|
|
||||||
ALLOW_WRITE_COLLECTOR=$(ALLOW_WRITE) condor-exec.htc.local condor-sub.htc.local
|
|
@@ -1,92 +0,0 @@
|
|||||||
######################################################################
|
|
||||||
##
|
|
||||||
## condor_config
|
|
||||||
##
|
|
||||||
## This is the global configuration file for condor. This is where
|
|
||||||
## you define where the local config file is. Any settings
|
|
||||||
## made here may potentially be overridden in the local configuration
|
|
||||||
## file. KEEP THAT IN MIND! To double-check that a variable is
|
|
||||||
## getting set from the configuration file that you expect, use
|
|
||||||
## condor_config_val -v <variable name>
|
|
||||||
##
|
|
||||||
## condor_config.annotated is a more detailed sample config file
|
|
||||||
##
|
|
||||||
## Unless otherwise specified, settings that are commented out show
|
|
||||||
## the defaults that are used if you don't define a value. Settings
|
|
||||||
## that are defined here MUST BE DEFINED since they have no default
|
|
||||||
## value.
|
|
||||||
##
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
## Where have you installed the bin, sbin and lib condor directories?
|
|
||||||
RELEASE_DIR = /usr
|
|
||||||
|
|
||||||
## Where is the local condor directory for each host? This is where the local config file(s), logs and
|
|
||||||
## spool/execute directories are located. this is the default for Linux and Unix systems.
|
|
||||||
LOCAL_DIR = /var
|
|
||||||
|
|
||||||
## Where is the machine-specific local config file for each host?
|
|
||||||
# LOCAL_CONFIG_FILE = /etc/condor/condor_config.local
|
|
||||||
LOCAL_CONFIG_FILE = /etc/condor/condor_config_$(HOSTNAME).local
|
|
||||||
## If your configuration is on a shared file system, then this might be a better default
|
|
||||||
#LOCAL_CONFIG_FILE = $(RELEASE_DIR)/etc/$(HOSTNAME).local
|
|
||||||
## If the local config file is not present, is it an error? (WARNING: This is a potential security issue.)
|
|
||||||
REQUIRE_LOCAL_CONFIG_FILE = false
|
|
||||||
|
|
||||||
## The normal way to do configuration with RPMs is to read all of the
|
|
||||||
## files in a given directory that don't match a regex as configuration files.
|
|
||||||
## Config files are read in lexicographic order.
|
|
||||||
LOCAL_CONFIG_DIR = /etc/condor/config.d
|
|
||||||
#LOCAL_CONFIG_DIR_EXCLUDE_REGEXP = ^((\..*)|(.*~)|(#.*)|(.*\.rpmsave)|(.*\.rpmnew))$
|
|
||||||
|
|
||||||
##
|
|
||||||
## Do NOT use host-based security by default.
|
|
||||||
##
|
|
||||||
## This was the default for the 8.8 series (and earlier), but it is
|
|
||||||
## intrinsically insecure. To make the 9.0 series secure by default, we
|
|
||||||
## commented it out.
|
|
||||||
##
|
|
||||||
## You should seriously consider improving your security configuration.
|
|
||||||
##
|
|
||||||
## To continue to use your old security configuration, knowing that it is
|
|
||||||
## insecure, add the line 'use SECURITY : HOST_BASED' to your local
|
|
||||||
## configuration directory. Don't just uncomment the final line in this
|
|
||||||
## comment block; changes in this file may be lost during your next upgrade.
|
|
||||||
## The following shell command will make the change on most Linux systems.
|
|
||||||
##
|
|
||||||
## echo 'use SECURITY : HOST_BASED' >> $(condor_config_val LOCAL_CONFIG_DIR)/00-insecure.config
|
|
||||||
##
|
|
||||||
|
|
||||||
## To expand your condor pool beyond a single host, set ALLOW_WRITE to match all of the hosts
|
|
||||||
#ALLOW_WRITE = *.cs.wisc.edu
|
|
||||||
## FLOCK_FROM defines the machines that grant access to your pool via flocking. (i.e. these machines can join your pool).
|
|
||||||
#FLOCK_FROM =
|
|
||||||
## FLOCK_TO defines the central managers that your schedd will advertise itself to (i.e. these pools will give matches to your schedd).
|
|
||||||
#FLOCK_TO = condor.cs.wisc.edu, cm.example.edu
|
|
||||||
|
|
||||||
##--------------------------------------------------------------------
|
|
||||||
## Values set by the rpm patch script:
|
|
||||||
##--------------------------------------------------------------------
|
|
||||||
|
|
||||||
## For Unix machines, the path and file name of the file containing
|
|
||||||
## the pool password for password authentication.
|
|
||||||
#SEC_PASSWORD_FILE = $(LOCAL_DIR)/lib/condor/pool_password
|
|
||||||
|
|
||||||
## Pathnames
|
|
||||||
RUN = $(LOCAL_DIR)/run/condor
|
|
||||||
LOG = $(LOCAL_DIR)/log/condor
|
|
||||||
LOCK = $(LOCAL_DIR)/lock/condor
|
|
||||||
SPOOL = $(LOCAL_DIR)/lib/condor/spool
|
|
||||||
EXECUTE = $(LOCAL_DIR)/lib/condor/execute
|
|
||||||
BIN = $(RELEASE_DIR)/bin
|
|
||||||
LIB = $(RELEASE_DIR)/lib64/condor
|
|
||||||
INCLUDE = $(RELEASE_DIR)/include/condor
|
|
||||||
SBIN = $(RELEASE_DIR)/sbin
|
|
||||||
LIBEXEC = $(RELEASE_DIR)/libexec/condor
|
|
||||||
SHARE = $(RELEASE_DIR)/share/condor
|
|
||||||
|
|
||||||
PROCD_ADDRESS = $(RUN)/procd_pipe
|
|
||||||
|
|
||||||
JAVA_CLASSPATH_DEFAULT = $(SHARE) .
|
|
||||||
|
|
||||||
## Install the minicondor package to run HTCondor on a single node
|
|
@@ -1,64 +0,0 @@
|
|||||||
|
|
||||||
HostKey _INSERT_HOST_KEY_
|
|
||||||
AuthorizedKeysFile _INSERT_AUTHORIZED_KEYS_FILE_
|
|
||||||
|
|
||||||
# The following option is not supported by all recent versions of OpenSSH,
|
|
||||||
# so instead we rely on injection of the shell setup command in the authorized
|
|
||||||
# keys file.
|
|
||||||
#ForceCommand _INSERT_FORCE_COMMAND_
|
|
||||||
|
|
||||||
# as a convenience to users, allow remote setting of environment
|
|
||||||
# since sshd is running as the job uid, there isn't really a security concern
|
|
||||||
AcceptEnv *
|
|
||||||
|
|
||||||
Subsystem sftp /usr/libexec/openssh/sftp-server
|
|
||||||
|
|
||||||
# prevent sshd from freaking out about reading files from inside
|
|
||||||
# a tmp-like directory
|
|
||||||
StrictModes no
|
|
||||||
|
|
||||||
# Rejection by tcp wrappers is not logged at INFO or VERBOSE log levels,
|
|
||||||
# so to make diagnosis of problems easier, we use DEBUG.
|
|
||||||
LogLevel DEBUG
|
|
||||||
|
|
||||||
X11Forwarding yes
|
|
||||||
|
|
||||||
# By default, OpenSSH uses the ipv6 loopback even if ipv6 is disabled in the
|
|
||||||
# kernel. This forces OpenSSH to use the "local" network instead
|
|
||||||
X11UseLocalhost no
|
|
||||||
|
|
||||||
# The following settings are recommended for good security.
|
|
||||||
# In particular, only FIPS 140-2 algorithms are used.
|
|
||||||
# URLs for extra information re FIPS security compliance:
|
|
||||||
# https://rhel7stig.readthedocs.io/en/latest/
|
|
||||||
# https://www.stigviewer.com/stig/red_hat_enterprise_linux_7/
|
|
||||||
# https://people.redhat.com/swells/scap-security-guide/tables/table-rhel7-stig-manual.html
|
|
||||||
|
|
||||||
#-FIPS High Severity
|
|
||||||
Protocol 2
|
|
||||||
PermitEmptyPasswords no
|
|
||||||
|
|
||||||
#-FIPS Medium Severity
|
|
||||||
# Note: Ciphers and MACs below will be incompatible with RHEL5 or earlier.
|
|
||||||
Ciphers aes128-ctr,aes192-ctr,aes256-ctr
|
|
||||||
MACs hmac-sha2-256,hmac-sha2-512
|
|
||||||
PermitRootLogin no
|
|
||||||
PermitUserEnvironment no
|
|
||||||
GSSAPIAuthentication no
|
|
||||||
KerberosAuthentication no
|
|
||||||
HostbasedAuthentication no
|
|
||||||
IgnoreRhosts yes
|
|
||||||
IgnoreUserKnownHosts yes
|
|
||||||
PrintLastLog yes
|
|
||||||
UsePrivilegeSeparation sandbox
|
|
||||||
Compression delayed
|
|
||||||
|
|
||||||
#-Recommended for security, but left out ssh_to_job config
|
|
||||||
# because they provide minimal value and are likely to annoy
|
|
||||||
# users or generate needless warnings in the ssh_to_job setting.
|
|
||||||
#
|
|
||||||
# ClientAliveInterval 600 # Note: condor_submit -i sets TMOUT
|
|
||||||
# ClientAliveCountMax 0
|
|
||||||
# banner=/etc/issue # Set to your warning banner
|
|
||||||
# StrictModes yes # Can't set due to tmp-like directory
|
|
||||||
# RhostsRSAAuthentication no # Obsolete Protocol version 1 option
|
|
@@ -1,26 +0,0 @@
|
|||||||
##
|
|
||||||
## Default security settings
|
|
||||||
##
|
|
||||||
## Host-based security was the default for the 8.8 series (and earlier).
|
|
||||||
##
|
|
||||||
## Host-based security assumes that all users on a machine are trusted.
|
|
||||||
## For example, if host-based security trusts that a given machine can
|
|
||||||
## run jobs, then any user who can start a process on that machine can
|
|
||||||
## start a startd that can "steal" jobs from the system.
|
|
||||||
#
|
|
||||||
## To help make HTCondor secure by default, we removed host-based security
|
|
||||||
## from the default configuration file
|
|
||||||
## (/etc/condor/condor_config).
|
|
||||||
##
|
|
||||||
## New installations of HTCondor should be made using the get_htcondor tool,
|
|
||||||
## which can automatically establish IDTOKENS-based security across a multi-
|
|
||||||
## node pool. For existing installations, we recommend you
|
|
||||||
## consider improving your security configuration.
|
|
||||||
##
|
|
||||||
## To continue to use your old security configuration,
|
|
||||||
## comment out the 'recommended' line below, and uncomment the
|
|
||||||
## 'host_based' line.
|
|
||||||
##
|
|
||||||
|
|
||||||
# use security : host_based
|
|
||||||
use security : recommended_v9_0
|
|
@@ -1 +0,0 @@
|
|||||||
CONDOR_HOST = condor-cm.htc.local
|
|
File diff suppressed because it is too large
Load Diff
@@ -1 +0,0 @@
|
|||||||
use ROLE:execute
|
|
@@ -1 +0,0 @@
|
|||||||
use ROLE:submit
|
|
@@ -1,11 +0,0 @@
|
|||||||
FROM docker.io/library/centos:7
|
|
||||||
|
|
||||||
RUN yum install -y https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm && \
|
|
||||||
yum install --nogpgcheck -y condor && \
|
|
||||||
yum install -y less && \
|
|
||||||
yum clean all
|
|
||||||
|
|
||||||
RUN yum install -y iproute bind-utils nmap-ncat net-tools && \
|
|
||||||
yum clean all
|
|
||||||
|
|
||||||
CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
|
|
@@ -1,142 +0,0 @@
|
|||||||
- name: "htcondor docker image"
|
|
||||||
file:
|
|
||||||
path: "/container/docker-images/htcondor"
|
|
||||||
state: directory
|
|
||||||
owner: "{{unpriv_user}}"
|
|
||||||
group: docker
|
|
||||||
mode: "u=rwx,g=rwx,o=rx"
|
|
||||||
|
|
||||||
- copy:
|
|
||||||
dest: "/container/docker-images/htcondor/Dockerfile"
|
|
||||||
src: "htcondor.Dockerfile"
|
|
||||||
owner: "{{unpriv_user}}"
|
|
||||||
group: docker
|
|
||||||
register: cp_dockerfile
|
|
||||||
|
|
||||||
- docker_image:
|
|
||||||
name: "htcondor"
|
|
||||||
# pull: False
|
|
||||||
build:
|
|
||||||
pull: False
|
|
||||||
path: "/container/docker-images/htcondor"
|
|
||||||
source: build
|
|
||||||
force_source: "{{cp_dockerfile.changed}}"
|
|
||||||
|
|
||||||
- name: "copy htcondor container configuration"
|
|
||||||
copy:
|
|
||||||
src: "conf/{{item}}/"
|
|
||||||
dest: "/container/volumes/{{item}}/"
|
|
||||||
owner: "{{unpriv_user}}"
|
|
||||||
group: docker
|
|
||||||
mode: "u=rwx,g=rwx"
|
|
||||||
with_items: [ "cm", "exec", "sub", "common"]
|
|
||||||
|
|
||||||
- name: "check if pool shared secret exists"
|
|
||||||
stat:
|
|
||||||
path: "/container/volumes/common/passwords.d/POOL"
|
|
||||||
register: pool_pw
|
|
||||||
|
|
||||||
- block:
|
|
||||||
- name: "create temporary password store"
|
|
||||||
tempfile:
|
|
||||||
state: directory
|
|
||||||
register: pool_pw_tmp
|
|
||||||
|
|
||||||
- name: "generate pool password"
|
|
||||||
copy:
|
|
||||||
dest: "{{pool_pw_tmp.path}}/poolpw"
|
|
||||||
content: "{{lookup('password','/dev/null')}}"
|
|
||||||
no_log: True
|
|
||||||
|
|
||||||
- name: "install pool password"
|
|
||||||
docker_container:
|
|
||||||
name: "condor-common"
|
|
||||||
image: htcondor
|
|
||||||
state: started
|
|
||||||
volumes:
|
|
||||||
- "/container/volumes/common/:/etc/condor/:rw"
|
|
||||||
- "{{pool_pw_tmp.path}}:/tmp/poolpw:ro"
|
|
||||||
detach: False
|
|
||||||
cleanup: True
|
|
||||||
command: "condor_store_cred add -c -i /tmp/poolpw/poolpw"
|
|
||||||
|
|
||||||
- name: "remove tokens since pool password (cert) changed"
|
|
||||||
file:
|
|
||||||
path: "/container/volumes/{{item}}/tokens.d/condor@htc.local"
|
|
||||||
state: absent
|
|
||||||
with_items: [ "cm", "exec", "sub" ]
|
|
||||||
|
|
||||||
always:
|
|
||||||
- name: "remove temporary password store"
|
|
||||||
file:
|
|
||||||
path: "{{pool_pw_tmp.path}}"
|
|
||||||
state: absent
|
|
||||||
when: pool_pw_tmp is defined and pool_pw_tmp.path
|
|
||||||
when: not pool_pw.stat.exists
|
|
||||||
|
|
||||||
- name: "sync common files to individual containers"
|
|
||||||
copy_2:
|
|
||||||
remote_src: True
|
|
||||||
force: True
|
|
||||||
directory_mode: preserve
|
|
||||||
mode: preserve
|
|
||||||
src: "/container/volumes/common/"
|
|
||||||
dest: "/container/volumes/{{item}}/"
|
|
||||||
with_items: [ "cm", "exec", "sub"]
|
|
||||||
|
|
||||||
|
|
||||||
- name: "collect tokens to generate"
|
|
||||||
stat:
|
|
||||||
path: "/container/volumes/{{item}}/tokens.d/condor@htc.local"
|
|
||||||
with_items: [ "cm", "exec", "sub" ]
|
|
||||||
register: tokens_state
|
|
||||||
|
|
||||||
- name: "generate tokens"
|
|
||||||
docker_container:
|
|
||||||
name: "condor-common"
|
|
||||||
image: htcondor
|
|
||||||
state: started
|
|
||||||
volumes:
|
|
||||||
- "/container/volumes/{{item}}/:/etc/condor/:rw"
|
|
||||||
detach: False
|
|
||||||
cleanup: True
|
|
||||||
command: "condor_token_create -identity condor@$(domainname) -token /etc/condor/tokens.d/condor@htc.local"
|
|
||||||
with_items: "{{tokens_state.results | rejectattr('stat.exists') | map(attribute='item') | list }}"
|
|
||||||
|
|
||||||
|
|
||||||
- name: "create docker network to make service discovery work"
|
|
||||||
docker_network:
|
|
||||||
name: condor
|
|
||||||
state: present
|
|
||||||
|
|
||||||
# TODO: reserve some address using docker_network_info and assign as aux
|
|
||||||
# address to enable cm to get a static address in order to be reachable from
|
|
||||||
# htcondor running on docker host to enable submitting jobs.
|
|
||||||
|
|
||||||
- name: "run htcondor containers"
|
|
||||||
docker_container:
|
|
||||||
name: "condor-{{item}}"
|
|
||||||
hostname: "condor-{{item}}"
|
|
||||||
domainname: "htc.local"
|
|
||||||
image: htcondor
|
|
||||||
state: started
|
|
||||||
detach: True
|
|
||||||
cleanup: True
|
|
||||||
networks_cli_compatible: True
|
|
||||||
networks:
|
|
||||||
- name: "condor"
|
|
||||||
aliases: [ "condor-{{item}}.htc.local" ]
|
|
||||||
volumes:
|
|
||||||
- "/container/volumes/{{item}}/:/etc/condor/:rw"
|
|
||||||
with_items: [ "cm", "exec", "sub"]
|
|
||||||
# auto_remove: True
|
|
||||||
# mounts:
|
|
||||||
# src: /container/volumes/cm/
|
|
||||||
# dest: /etc/condor/
|
|
||||||
|
|
||||||
|
|
||||||
#- add_host:
|
|
||||||
# hostname: foo
|
|
||||||
# ansible_connection: docker_api
|
|
||||||
# docker_host: ssh://ed-c7-1.virt.magni.thoto.net
|
|
||||||
|
|
@@ -8,41 +8,6 @@
|
|||||||
name: [ "docker-ce", "python-docker-py" ] # latter for ansible modules
|
name: [ "docker-ce", "python-docker-py" ] # latter for ansible modules
|
||||||
state: present
|
state: present
|
||||||
|
|
||||||
- name: "partition container image disk"
|
|
||||||
parted:
|
|
||||||
device: /dev/vdb
|
|
||||||
number: 1
|
|
||||||
state: present
|
|
||||||
# fs_type: xfs
|
|
||||||
|
|
||||||
- filesystem:
|
|
||||||
dev: /dev/vdb1
|
|
||||||
fstype: xfs
|
|
||||||
opts: "-L image-store"
|
|
||||||
|
|
||||||
- mount:
|
|
||||||
path: "/container"
|
|
||||||
src: "/dev/vdb1"
|
|
||||||
fstype: xfs
|
|
||||||
opts: "noatime"
|
|
||||||
state: mounted
|
|
||||||
|
|
||||||
- file:
|
|
||||||
path: "/container/docker"
|
|
||||||
state: directory
|
|
||||||
owner: root
|
|
||||||
group: root
|
|
||||||
mode: "u=rwx,g=x,o=x"
|
|
||||||
|
|
||||||
- name: "link docker configuration to new container partition"
|
|
||||||
file:
|
|
||||||
path: "/var/lib/docker"
|
|
||||||
src: "/container/docker"
|
|
||||||
state: link
|
|
||||||
owner: root
|
|
||||||
group: root
|
|
||||||
mode: "u=rwx,g=x,o=x"
|
|
||||||
|
|
||||||
- name: "enable docker service in systemd"
|
- name: "enable docker service in systemd"
|
||||||
service:
|
service:
|
||||||
name: docker
|
name: docker
|
||||||
|
@@ -1,5 +1 @@
|
|||||||
container_privileged: False
|
container_privileged: False
|
||||||
slurm_user: slurm
|
|
||||||
slurm_log_path_ctld: /var/log/slurm/slurmctld.log
|
|
||||||
slurm_log_path_d: /var/log/slurm/slurmd.log
|
|
||||||
slurm_log_path_sched: /var/log/slurm/slurmsched.log
|
|
||||||
|
@@ -1,9 +1,8 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
if [ -f "/etc/munge/munge.key" ] ; then
|
chown munge:munge /etc/munge/munge.key
|
||||||
chown munge:munge /etc/munge/munge.key
|
|
||||||
chmod 600 /etc/munge/munge.key
|
chmod 600 /etc/munge/munge.key
|
||||||
fi
|
|
||||||
|
|
||||||
exec "$@"
|
exec "$@"
|
||||||
|
@@ -2,42 +2,25 @@ FROM docker.io/library/centos:7 as base
|
|||||||
|
|
||||||
RUN yum install -y epel-release && \
|
RUN yum install -y epel-release && \
|
||||||
yum install -y slurm && \
|
yum install -y slurm && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/entrypoint.sh
|
||||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||||
|
|
||||||
ARG slurmuser=slurm
|
|
||||||
ENV slurmuser=${slurmuser}
|
|
||||||
|
|
||||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|
||||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
|
||||||
|
|
||||||
FROM base as slurmd
|
FROM base as slurmd
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmd && \
|
RUN yum install -y slurm-slurmd && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
|
||||||
slurmd -D 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
|
||||||
FROM base as slurmctld
|
FROM base as slurmctld
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmctld && \
|
RUN yum install -y slurm-slurmctld && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
# CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
|
||||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
@@ -1,32 +1,20 @@
|
|||||||
FROM docker.io/library/centos:7 as base
|
FROM docker.io/library/centos:7 as base
|
||||||
|
|
||||||
RUN yum install -y epel-release && \
|
RUN yum install -y epel-release && \
|
||||||
yum install -y slurm && \
|
yum install -y slurm slurm-slurmctld && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
|
RUN yum install -y slurm-slurmctld && \
|
||||||
|
yum clean all
|
||||||
|
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/entrypoint.sh
|
||||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||||
|
|
||||||
ARG slurmuser=slurm
|
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmctld -D & })'
|
||||||
ENV slurmuser=${slurmuser}
|
# ... & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
|
||||||
|
|
||||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|
||||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmctld && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
|
||||||
su -s /bin/sh -c "slurmctld -D" ${slurmuser} 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMCTLD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
@@ -1,32 +1,17 @@
|
|||||||
FROM docker.io/library/centos:7
|
FROM docker.io/library/centos:7
|
||||||
|
|
||||||
RUN yum install -y epel-release && \
|
RUN yum install -y epel-release && \
|
||||||
yum install -y slurm && \
|
yum install -y slurm slurm-slurmd && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
RUN yum install -y less iproute bind-utils nmap-ncat net-tools && \
|
||||||
yum clean all && rm -rf /var/cache/yum
|
yum clean all
|
||||||
|
|
||||||
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
COPY entrypoint.sh /usr/local/sbin/entrypoint.sh
|
||||||
|
|
||||||
RUN chown root:root /usr/local/sbin/entrypoint.sh && \
|
RUN chown root:root /usr/local/sbin/entrypoint.sh && chmod 755 /usr/local/sbin/entrypoint.sh
|
||||||
chmod 755 /usr/local/sbin/entrypoint.sh
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
ENTRYPOINT [ "/usr/local/sbin/entrypoint.sh" ]
|
||||||
|
|
||||||
ARG slurmuser=slurm
|
# CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })'
|
||||||
ENV slurmuser=${slurmuser}
|
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & slurmd -D & })'
|
||||||
|
|
||||||
RUN useradd -d /var/lib/slurm -m --no-log-init --system $slurmuser &&\
|
|
||||||
slurm-setuser -u $slurmuser -g $slurmuser -y
|
|
||||||
|
|
||||||
RUN yum install -y slurm-slurmd && \
|
|
||||||
yum clean all && rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
ENV SLURMCTLD_LOG_PATH="/var/log/slurm/slurmctld.log"
|
|
||||||
ENV SLURMD_LOG_PATH="/var/log/slurm/slurmd.log"
|
|
||||||
ENV SLURM_SCHED_LOG_PATH="/var/log/slurm/slurmsched.log"
|
|
||||||
|
|
||||||
CMD bash -c 'cat <({ su -s /bin/sh -c "munged -F" munge & \
|
|
||||||
slurmd -D 2>/dev/null 1>/dev/null & \
|
|
||||||
tail --retry --pid $! -f ${SLURMD_LOG_PATH} ${SLURM_SCHED_LOG_PATH} & })'
|
|
||||||
|
@@ -1,19 +1,19 @@
|
|||||||
- file:
|
- file:
|
||||||
path: "/container/docker-images/{{item}}"
|
path: "/home/centos7/docker-images/{{item}}"
|
||||||
state: directory
|
state: directory
|
||||||
owner: "{{unpriv_user}}"
|
owner: "{{unpriv_user}}"
|
||||||
group: docker
|
group: docker
|
||||||
|
|
||||||
- copy:
|
- copy:
|
||||||
src: "{{item}}.Dockerfile"
|
src: "{{item}}.Dockerfile"
|
||||||
dest: "/container/docker-images/{{item}}/Dockerfile"
|
dest: "/home/centos7/docker-images/{{item}}/Dockerfile"
|
||||||
owner: "{{unpriv_user}}"
|
owner: "{{unpriv_user}}"
|
||||||
group: docker
|
group: docker
|
||||||
register: slurm_cp_dockerfile
|
register: slurm_cp_dockerfile
|
||||||
|
|
||||||
- copy:
|
- copy:
|
||||||
src: "entrypoint.sh"
|
src: "entrypoint.sh"
|
||||||
dest: "/container/docker-images/{{item}}/entrypoint.sh"
|
dest: "/home/centos7/docker-images/{{item}}/entrypoint.sh"
|
||||||
owner: root
|
owner: root
|
||||||
group: root
|
group: root
|
||||||
mode: u=rwx,g=rx,o=rx
|
mode: u=rwx,g=rx,o=rx
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
# pull: False
|
# pull: False
|
||||||
build:
|
build:
|
||||||
pull: False
|
pull: False
|
||||||
path: "/container/docker-images/{{item}}"
|
path: "/home/centos7/docker-images/{{item}}"
|
||||||
# target: "{{item}}" # unsupported on old docker-py versions as in el7
|
# target: "{{item}}" # unsupported on old docker-py versions as in el7
|
||||||
source: build
|
source: build
|
||||||
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}"
|
force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}"
|
||||||
|
@@ -21,7 +21,7 @@
|
|||||||
mode: u=rw,g=,o=
|
mode: u=rw,g=,o=
|
||||||
|
|
||||||
- file:
|
- file:
|
||||||
path: /container/volumes/munge
|
path: /home/centos7/volumes/munge
|
||||||
state: directory
|
state: directory
|
||||||
owner: munge
|
owner: munge
|
||||||
group: munge
|
group: munge
|
||||||
@@ -33,17 +33,17 @@
|
|||||||
force: true
|
force: true
|
||||||
mode: preserve
|
mode: preserve
|
||||||
src: /etc/munge/munge.key
|
src: /etc/munge/munge.key
|
||||||
dest: /container/volumes/munge/munge.key
|
dest: /home/centos7/volumes/munge/munge.key
|
||||||
|
|
||||||
- file:
|
- file:
|
||||||
path: /container/volumes/slurm/
|
path: /home/centos7/volumes/slurm/
|
||||||
state: directory
|
state: directory
|
||||||
|
|
||||||
- name: upload slurm config
|
- name: upload slurm config
|
||||||
template:
|
template:
|
||||||
force: true
|
force: true
|
||||||
src: "{{item}}.j2"
|
src: "{{item}}.j2"
|
||||||
dest: "/container/volumes/slurm/{{item}}"
|
dest: "/home/centos7/volumes/slurm/{{item}}"
|
||||||
loop:
|
loop:
|
||||||
- slurm.conf
|
- slurm.conf
|
||||||
- cgroup.conf
|
- cgroup.conf
|
||||||
@@ -82,8 +82,6 @@
|
|||||||
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
volumes: "{{default_mounts + ( item.extra_mounts | default([]) ) }}"
|
||||||
networks:
|
networks:
|
||||||
- name: "slurm"
|
- name: "slurm"
|
||||||
env:
|
|
||||||
slurmuser: "{{slurm_user}}"
|
|
||||||
image: "{{item.image}}"
|
image: "{{item.image}}"
|
||||||
state: started
|
state: started
|
||||||
detach: True
|
detach: True
|
||||||
@@ -92,8 +90,8 @@
|
|||||||
networks_cli_compatible: True
|
networks_cli_compatible: True
|
||||||
vars:
|
vars:
|
||||||
default_mounts:
|
default_mounts:
|
||||||
- /container/volumes/slurm/:/etc/slurm/:rw
|
- /home/centos7/volumes/slurm/:/etc/slurm/:rw
|
||||||
- /container/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
- /home/centos7/volumes/munge/munge.key:/etc/munge/munge.key:rw
|
||||||
- slurm-shared:/shared/:rw
|
- slurm-shared:/shared/:rw
|
||||||
slurm_nodes_all: | # add execute nodes
|
slurm_nodes_all: | # add execute nodes
|
||||||
{% for i in range(1, 4) -%}
|
{% for i in range(1, 4) -%}
|
||||||
|
@@ -32,7 +32,6 @@ CryptoType=crypto/munge
|
|||||||
#MaxStepCount=40000
|
#MaxStepCount=40000
|
||||||
#MaxTasksPerNode=128
|
#MaxTasksPerNode=128
|
||||||
MpiDefault=pmix
|
MpiDefault=pmix
|
||||||
# when running slurmd as user change to: MpiDefault=none
|
|
||||||
#MpiParams=ports=#-#
|
#MpiParams=ports=#-#
|
||||||
#PluginDir=
|
#PluginDir=
|
||||||
#PlugStackConfig=
|
#PlugStackConfig=
|
||||||
@@ -58,9 +57,8 @@ SlurmctldPort=6817
|
|||||||
SlurmdPidFile=/var/run/slurm/slurmd.pid
|
SlurmdPidFile=/var/run/slurm/slurmd.pid
|
||||||
SlurmdPort=6818
|
SlurmdPort=6818
|
||||||
SlurmdSpoolDir=/var/spool/slurm/d
|
SlurmdSpoolDir=/var/spool/slurm/d
|
||||||
SlurmUser={{slurm_user}}
|
SlurmUser=root
|
||||||
SlurmdUser=root
|
#SlurmdUser=root
|
||||||
# SlurmdUser=slurm -> sbatch does not work
|
|
||||||
#SrunEpilog=
|
#SrunEpilog=
|
||||||
#SrunProlog=
|
#SrunProlog=
|
||||||
StateSaveLocation=/var/spool/slurm/ctld
|
StateSaveLocation=/var/spool/slurm/ctld
|
||||||
@@ -132,7 +130,7 @@ AccountingStorageType=accounting_storage/none
|
|||||||
#AccountingStorageUser=
|
#AccountingStorageUser=
|
||||||
AccountingStoreJobComment=YES
|
AccountingStoreJobComment=YES
|
||||||
ClusterName=cluster
|
ClusterName=cluster
|
||||||
#DebugFlags=Steps,TraceJobs
|
#DebugFlags=
|
||||||
#JobCompHost=
|
#JobCompHost=
|
||||||
JobCompLoc=/tmp/jobcomp
|
JobCompLoc=/tmp/jobcomp
|
||||||
#JobCompPass=
|
#JobCompPass=
|
||||||
@@ -143,10 +141,10 @@ JobCompType=jobcomp/filetxt
|
|||||||
JobAcctGatherFrequency=30
|
JobAcctGatherFrequency=30
|
||||||
JobAcctGatherType=jobacct_gather/none
|
JobAcctGatherType=jobacct_gather/none
|
||||||
SlurmctldDebug=verbose
|
SlurmctldDebug=verbose
|
||||||
SlurmctldLogFile={{slurm_log_path_ctld}}
|
#SlurmctldLogFile=
|
||||||
SlurmdDebug=verbose
|
SlurmdDebug=verbose
|
||||||
SlurmdLogFile={{slurm_log_path_d}}
|
#SlurmdLogFile=
|
||||||
SlurmSchedLogFile={{slurm_log_path_sched}}
|
#SlurmSchedLogFile=
|
||||||
#SlurmSchedLogLevel=
|
#SlurmSchedLogLevel=
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
@@ -165,4 +163,4 @@ SlurmSchedLogFile={{slurm_log_path_sched}}
|
|||||||
# COMPUTE NODES
|
# COMPUTE NODES
|
||||||
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
|
NodeName=slurm-exec[1-{{num_nodes}}] CPUs=2 CoresPerSocket=2 State=UNKNOWN
|
||||||
NodeName=slurm-submit1 CPUs=1 State=UNKNOWN
|
NodeName=slurm-submit1 CPUs=1 State=UNKNOWN
|
||||||
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] AllocNodes=slurm-submit1 Default=YES MaxTime=INFINITE State=UP
|
PartitionName=debug Nodes=slurm-exec[1-{{num_nodes}}] Default=YES MaxTime=INFINITE State=UP
|
||||||
|
Reference in New Issue
Block a user