diff --git a/inv.yml b/inv.yml index b2dee1f..06d15ec 100644 --- a/inv.yml +++ b/inv.yml @@ -1,15 +1,12 @@ all: hosts: - ed-c7-1: + centos7VM: ansible_user: root - ansible_host: ed-c7-1.virt.magni.thoto.net + ansible_host: localhost + ansible_port: 19322 ssh_args: -o ControlMaster=auto -o ControlPersist=60s - # ansible_host: 192.168.122.139 - unpriv_user: thoto + unpriv_user: centos7 # gains privileges to use docker children: - htcondor: - hosts: - ed-c7-1: slurm: hosts: - ed-c7-1: + centos7VM diff --git a/play.yml b/play.yml index d2110ba..d84e67c 100644 --- a/play.yml +++ b/play.yml @@ -1,46 +1,15 @@ --- - hosts: all tasks: + - name: "install epel-release" + yum: + name: [ epel-release ] # necessary to find htop => separate step + state: present - name: "install tools" yum: - name: [ vim-enhanced, htop, screen, bind-utils, nmap-ncat, net-tools ] + name: [ vim-enhanced, htop, screen, tmux, bind-utils, nmap-ncat, net-tools ] state: present -- hosts: htcondor - pre_tasks: - - name: "install htcondor repo" - yum: - name: https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm - state: present - tags: htcondor - - - name: "install htcondor software " - yum: - name: htcondor-ce - state: present - tags: htcondor - - - name: "remove minicondor configuration" - yum: - name: minicondor - state: absent - tags: htcondor - - - name: "setup singularity" - import_tasks: "singularity.yml" - tags: singularity - - roles: - - name: "setup docker" - role: docker - tags: docker - - - name: "setup htcondor test environment in docker containers" - role: docker-htcondor - tags: - - htcondor-containered - - htcondor - - hosts: slurm vars: container_privileged: True diff --git a/roles/docker-htcondor/files/conf/cm/config.d/50-condor-centralmanager b/roles/docker-htcondor/files/conf/cm/config.d/50-condor-centralmanager deleted file mode 100644 index 7219d54..0000000 --- a/roles/docker-htcondor/files/conf/cm/config.d/50-condor-centralmanager +++ /dev/null @@ -1,3 +0,0 @@ -use ROLE:centralmanager -# ALLOW_WRITE per default on * -> following has no effect yet -ALLOW_WRITE_COLLECTOR=$(ALLOW_WRITE) condor-exec.htc.local condor-sub.htc.local diff --git a/roles/docker-htcondor/files/conf/common/condor_config b/roles/docker-htcondor/files/conf/common/condor_config deleted file mode 100644 index 932de5c..0000000 --- a/roles/docker-htcondor/files/conf/common/condor_config +++ /dev/null @@ -1,92 +0,0 @@ -###################################################################### -## -## condor_config -## -## This is the global configuration file for condor. This is where -## you define where the local config file is. Any settings -## made here may potentially be overridden in the local configuration -## file. KEEP THAT IN MIND! To double-check that a variable is -## getting set from the configuration file that you expect, use -## condor_config_val -v -## -## condor_config.annotated is a more detailed sample config file -## -## Unless otherwise specified, settings that are commented out show -## the defaults that are used if you don't define a value. Settings -## that are defined here MUST BE DEFINED since they have no default -## value. -## -###################################################################### - -## Where have you installed the bin, sbin and lib condor directories? -RELEASE_DIR = /usr - -## Where is the local condor directory for each host? This is where the local config file(s), logs and -## spool/execute directories are located. this is the default for Linux and Unix systems. -LOCAL_DIR = /var - -## Where is the machine-specific local config file for each host? -# LOCAL_CONFIG_FILE = /etc/condor/condor_config.local -LOCAL_CONFIG_FILE = /etc/condor/condor_config_$(HOSTNAME).local -## If your configuration is on a shared file system, then this might be a better default -#LOCAL_CONFIG_FILE = $(RELEASE_DIR)/etc/$(HOSTNAME).local -## If the local config file is not present, is it an error? (WARNING: This is a potential security issue.) -REQUIRE_LOCAL_CONFIG_FILE = false - -## The normal way to do configuration with RPMs is to read all of the -## files in a given directory that don't match a regex as configuration files. -## Config files are read in lexicographic order. -LOCAL_CONFIG_DIR = /etc/condor/config.d -#LOCAL_CONFIG_DIR_EXCLUDE_REGEXP = ^((\..*)|(.*~)|(#.*)|(.*\.rpmsave)|(.*\.rpmnew))$ - -## -## Do NOT use host-based security by default. -## -## This was the default for the 8.8 series (and earlier), but it is -## intrinsically insecure. To make the 9.0 series secure by default, we -## commented it out. -## -## You should seriously consider improving your security configuration. -## -## To continue to use your old security configuration, knowing that it is -## insecure, add the line 'use SECURITY : HOST_BASED' to your local -## configuration directory. Don't just uncomment the final line in this -## comment block; changes in this file may be lost during your next upgrade. -## The following shell command will make the change on most Linux systems. -## -## echo 'use SECURITY : HOST_BASED' >> $(condor_config_val LOCAL_CONFIG_DIR)/00-insecure.config -## - -## To expand your condor pool beyond a single host, set ALLOW_WRITE to match all of the hosts -#ALLOW_WRITE = *.cs.wisc.edu -## FLOCK_FROM defines the machines that grant access to your pool via flocking. (i.e. these machines can join your pool). -#FLOCK_FROM = -## FLOCK_TO defines the central managers that your schedd will advertise itself to (i.e. these pools will give matches to your schedd). -#FLOCK_TO = condor.cs.wisc.edu, cm.example.edu - -##-------------------------------------------------------------------- -## Values set by the rpm patch script: -##-------------------------------------------------------------------- - -## For Unix machines, the path and file name of the file containing -## the pool password for password authentication. -#SEC_PASSWORD_FILE = $(LOCAL_DIR)/lib/condor/pool_password - -## Pathnames -RUN = $(LOCAL_DIR)/run/condor -LOG = $(LOCAL_DIR)/log/condor -LOCK = $(LOCAL_DIR)/lock/condor -SPOOL = $(LOCAL_DIR)/lib/condor/spool -EXECUTE = $(LOCAL_DIR)/lib/condor/execute -BIN = $(RELEASE_DIR)/bin -LIB = $(RELEASE_DIR)/lib64/condor -INCLUDE = $(RELEASE_DIR)/include/condor -SBIN = $(RELEASE_DIR)/sbin -LIBEXEC = $(RELEASE_DIR)/libexec/condor -SHARE = $(RELEASE_DIR)/share/condor - -PROCD_ADDRESS = $(RUN)/procd_pipe - -JAVA_CLASSPATH_DEFAULT = $(SHARE) . - -## Install the minicondor package to run HTCondor on a single node diff --git a/roles/docker-htcondor/files/conf/common/condor_ssh_to_job_sshd_config_template b/roles/docker-htcondor/files/conf/common/condor_ssh_to_job_sshd_config_template deleted file mode 100644 index fb70d86..0000000 --- a/roles/docker-htcondor/files/conf/common/condor_ssh_to_job_sshd_config_template +++ /dev/null @@ -1,64 +0,0 @@ - -HostKey _INSERT_HOST_KEY_ -AuthorizedKeysFile _INSERT_AUTHORIZED_KEYS_FILE_ - -# The following option is not supported by all recent versions of OpenSSH, -# so instead we rely on injection of the shell setup command in the authorized -# keys file. -#ForceCommand _INSERT_FORCE_COMMAND_ - -# as a convenience to users, allow remote setting of environment -# since sshd is running as the job uid, there isn't really a security concern -AcceptEnv * - -Subsystem sftp /usr/libexec/openssh/sftp-server - -# prevent sshd from freaking out about reading files from inside -# a tmp-like directory -StrictModes no - -# Rejection by tcp wrappers is not logged at INFO or VERBOSE log levels, -# so to make diagnosis of problems easier, we use DEBUG. -LogLevel DEBUG - -X11Forwarding yes - -# By default, OpenSSH uses the ipv6 loopback even if ipv6 is disabled in the -# kernel. This forces OpenSSH to use the "local" network instead -X11UseLocalhost no - -# The following settings are recommended for good security. -# In particular, only FIPS 140-2 algorithms are used. -# URLs for extra information re FIPS security compliance: -# https://rhel7stig.readthedocs.io/en/latest/ -# https://www.stigviewer.com/stig/red_hat_enterprise_linux_7/ -# https://people.redhat.com/swells/scap-security-guide/tables/table-rhel7-stig-manual.html - -#-FIPS High Severity -Protocol 2 -PermitEmptyPasswords no - -#-FIPS Medium Severity -# Note: Ciphers and MACs below will be incompatible with RHEL5 or earlier. -Ciphers aes128-ctr,aes192-ctr,aes256-ctr -MACs hmac-sha2-256,hmac-sha2-512 -PermitRootLogin no -PermitUserEnvironment no -GSSAPIAuthentication no -KerberosAuthentication no -HostbasedAuthentication no -IgnoreRhosts yes -IgnoreUserKnownHosts yes -PrintLastLog yes -UsePrivilegeSeparation sandbox -Compression delayed - -#-Recommended for security, but left out ssh_to_job config -# because they provide minimal value and are likely to annoy -# users or generate needless warnings in the ssh_to_job setting. -# -# ClientAliveInterval 600 # Note: condor_submit -i sets TMOUT -# ClientAliveCountMax 0 -# banner=/etc/issue # Set to your warning banner -# StrictModes yes # Can't set due to tmp-like directory -# RhostsRSAAuthentication no # Obsolete Protocol version 1 option diff --git a/roles/docker-htcondor/files/conf/common/config.d/00-htcondor-9.0.config b/roles/docker-htcondor/files/conf/common/config.d/00-htcondor-9.0.config deleted file mode 100644 index 1fe0314..0000000 --- a/roles/docker-htcondor/files/conf/common/config.d/00-htcondor-9.0.config +++ /dev/null @@ -1,26 +0,0 @@ -## -## Default security settings -## -## Host-based security was the default for the 8.8 series (and earlier). -## -## Host-based security assumes that all users on a machine are trusted. -## For example, if host-based security trusts that a given machine can -## run jobs, then any user who can start a process on that machine can -## start a startd that can "steal" jobs from the system. -# -## To help make HTCondor secure by default, we removed host-based security -## from the default configuration file -## (/etc/condor/condor_config). -## -## New installations of HTCondor should be made using the get_htcondor tool, -## which can automatically establish IDTOKENS-based security across a multi- -## node pool. For existing installations, we recommend you -## consider improving your security configuration. -## -## To continue to use your old security configuration, -## comment out the 'recommended' line below, and uncomment the -## 'host_based' line. -## - -# use security : host_based -use security : recommended_v9_0 diff --git a/roles/docker-htcondor/files/conf/common/config.d/20-common.config b/roles/docker-htcondor/files/conf/common/config.d/20-common.config deleted file mode 100644 index 5bce3ee..0000000 --- a/roles/docker-htcondor/files/conf/common/config.d/20-common.config +++ /dev/null @@ -1 +0,0 @@ -CONDOR_HOST = condor-cm.htc.local diff --git a/roles/docker-htcondor/files/conf/common/ganglia.d/00_default_metrics b/roles/docker-htcondor/files/conf/common/ganglia.d/00_default_metrics deleted file mode 100644 index 69a2811..0000000 --- a/roles/docker-htcondor/files/conf/common/ganglia.d/00_default_metrics +++ /dev/null @@ -1,1081 +0,0 @@ -[ - Name = strcat(MyType,"CondorVersion"); - Value = CondorVersion; - Desc = "Version String"; - TargetType = "Scheduler,Negotiator"; -] -[ - Name = strcat(MyType,"UpdatesLost"); - Value = UpdatesLost; - Verbosity = 2; - Desc = "Number of ClassAd updates that were sent by this daemon but not received by the collector"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] -[ - Name = strcat(MyType,"UpdatesTotal"); - Value = UpdatesTotal; - Verbosity = 2; - Desc = "Number of ClassAd updates that were sent by this daemon"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] -[ - Name = strcat(MyType,"RecentDaemonCoreDutyCycle"); - Value = RecentDaemonCoreDutyCycle; - Desc = "Recent fraction of busy time in the daemon event loop"; - Scale = 100; - Units = "%"; - TargetType = "Scheduler,Negotiator"; -] -/* Not a useful graph. Should be converted to a human readable string metric */ -[ - Name = strcat(MyType,"MonitorSelfAge"); - Value = MonitorSelfAge; - Verbosity = 99; - Desc = "Age of this daemon"; - Units = "seconds"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] -[ - Name = strcat(MyType,"MonitorSelfCPUUsage"); - Value = MonitorSelfCPUUsage; - Verbosity = 2; - Desc = "The fraction of one CPU recently used by this daemon"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] -[ - Name = strcat(MyType,"MonitorSelfImageSize"); - Value = MonitorSelfImageSize; - Verbosity = 1; - Desc = "Memory allocated to this daemon (i.e. virtual image size)"; - Units = "bytes"; - Scale = 1024; - Type = "float"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] -[ - Name = strcat(MyType,"MonitorSelfRegisteredSocketCount"); - Value = MonitorSelfRegisteredSocketCount; - Verbosity = 2; - Desc = "Number of sockets registered in this daemon's event loop"; - Units = "sockets"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] -[ - Name = strcat(MyType,"MonitorSelfResidentSetSize"); - Value = MonitorSelfResidentSetSize; - Verbosity = 2; - Desc = "RAM allocated to this daemon"; - Units = "bytes"; - Scale = 1024; - Type = "float"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] -[ - Name = strcat(MyType,"MonitorSelfSecuritySessions"); - Value = MonitorSelfSecuritySessions; - Verbosity = 2; - Desc = "Number of security sessions in this daemon's cache"; - TargetType = "Scheduler,Negotiator,Machine_slot1"; -] - -[ - Name = "JobsAccumBadputTime"; - Desc = "Runtime of jobs that were aborted (removed or held) or (standard universe only) evicted without a checkpoint."; - Scale = 0.000277778; - Units = "hours"; - Type = "float"; - TargetType = "Scheduler"; -] -[ - Name = "JobsAccumExecuteTime"; - Desc = "Time spent running jobs. Does not include file transfer and other job handling time."; - Scale = 0.000277778; - Units = "hours"; - Type = "float"; - TargetType = "Scheduler"; -] -[ - Name = "JobsAccumPostExecuteTime"; - Verbosity = 2; - Desc = "Time spent processing a completed job (includes output file transfer)"; - Scale = 0.000277778; - Units = "hours"; - Type = "float"; - TargetType = "Scheduler"; -] -[ - Name = "JobsAccumPreExecuteTime"; - Verbosity = 2; - Desc = "Time spent preparing to run a job (includes input file transfer)"; - Scale = 0.000277778; - Units = "hours"; - Type = "float"; - TargetType = "Scheduler"; -] -[ - Name = "JobsAccumRunningTime"; - Desc = "Time spent running jobs that were not counted as badput (i.e. not removed or held). Includes file transfer and other handling time."; - Units = "hours"; - Scale = 0.000277778; - Type = "float"; - TargetType = "Scheduler"; -] -[ - Name = "JobsAccumTimeToStart"; - Verbosity = 2; - Desc = "Time between submit and running of a job"; - Scale = 0.000277778; - Units = "hours"; - Type = "float"; - TargetType = "Scheduler"; -] -[ - Name = "JobsCheckpointed"; - Verbosity = 2; - Desc = "Number of job run attempts that were interrupted and successfully checkpointed"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsCompleted"; - Desc = "Number of jobs that terminated normally (i.e. not via a signal or abort)"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsCoredumped"; - Verbosity = 1; - Desc = "Number of jobs that crashed and generated a core file"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsDebugLogError"; - Verbosity = 2; - Desc = "Count of shadows that exited due to debug log errors"; - Units = "shadows"; - TargetType = "Scheduler"; -] -[ - Name = "JobsExecFailed"; - Verbosity = 1; - Desc = "Count of job run attempts that failed to execute the specified command"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsExited"; - Verbosity = 2; - Desc = "Count of job run attempts that have completed (successfully or not)"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsExitedAndClaimClosing"; - Verbosity = 2; - Desc = "Count of job run attempts that have completed when claim was not accepting additional jobs"; - Units = "jobs"; - TargetType = "Scheduler"; -] -/* JobsExitedNormally is the same as JobsCompleted, so don't bother. */ -/* -[ - Name = "JobsExitedNormally"; - Desc = ""; - Units = ""; - TargetType = "Scheduler"; -] -*/ -[ - Name = "JobsExitException"; - Verbosity = 2; - Desc = "Count of job run attempts that ended with a job handling exception (shadow exception)"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsKilled"; - Verbosity = 1; - Desc = "Count of job run attempts in which the job was killed (i.e. evicted)"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsMissedDeferralTime"; - Verbosity = 2; - Desc = "Count of job run attempts that failed because the specified deferral time was missed"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsNotStarted"; - Verbosity = 2; - Desc = "Count of job run attempts that failed because the request to activate the claim failed"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsShadowNoMemory"; - Verbosity = 2; - Desc = "Count of job run attempts that failed because there was not enough memory (RESERVED_SWAP)"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsShouldHold"; - Verbosity = 2; - Desc = "Count of job run attempts that have resulted in the job going on hold"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsShouldRemove"; - Verbosity = 2; - Desc = "Count of job run attempts that have resulted in the job being removed (e.g. periodic_remove policy)"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsShouldRequeue"; - Verbosity = 2; - Desc = "Count of job run attempts that ended with the job being requeued due to handling failures or OnExitRemove=false"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsStarted"; - Verbosity = 1; - Desc = "Number of job run attempts started"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "JobsSubmitted"; - Desc = "Number of jobs submitted"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "MaxJobsRunning"; - Verbosity = 1; - Desc = "Configured limit on number of running jobs"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "NumUsers"; - Verbosity = 1; - Desc = "Number of different users who currently have jobs in the queue"; - Units = "users"; - TargetType = "Scheduler"; -] -[ - Name = "RecentStatsLifetime"; - Verbosity = 2; - Desc = "Seconds elapsed since the beginning of the current stats collection window"; - Units = "seconds"; - TargetType = "Scheduler"; -] -[ - Name = "ScheddSwapExhausted"; - Verbosity = 2; - Desc = "Non-zero when jobs cannot be started due to RESERVED_SWAP"; - TargetType = "Scheduler"; -] -[ - Name = "ShadowsRunning"; - Verbosity = 2; - Desc = "Number of shadow processes currently running"; - Units = "shadows"; - TargetType = "Scheduler"; -] -[ - Name = "ShadowsStarted"; - Verbosity = 2; - Desc = "Number of shadow processes started"; - Units = "shadows"; - TargetType = "Scheduler"; -] -[ - Name = "StatsLifetime"; - Verbosity = 2; - Desc = "Seconds of elapsed time since the beginning of the schedd lifetime stat collection window"; - Units = "seconds"; - TargetType = "Scheduler"; -] -[ - Name = "TotalFlockedJobs"; - Desc = "Number of jobs from this schedd that are flocked to other pools"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalHeldJobs"; - Desc = "Number of jobs in this schedd that are on hold"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Aggregate = "SUM"; - Name = "Held Jobs in Pool"; - Value = TotalHeldJobs; - Desc = "Number of jobs on hold in schedds reporting to this pool"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalIdleJobs"; - Desc = "Number of idle jobs in this schedd's queue"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Aggregate = "SUM"; - Name = "Idle Jobs in Pool"; - Value = TotalIdleJobs; - Desc = "Number of idle jobs in schedds reporting to this pool"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalJobAds"; - Desc = "Number of jobs currently in this schedd's queue"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Aggregate = "SUM"; - Name = "Jobs in Pool"; - Value = TotalJobAds; - Desc = "Number of jobs currently in schedds reporting to this pool"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalLocalJobsIdle"; - Verbosity = 2; - Desc = "Number of local universe jobs in this schedd's queue"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalLocalJobsRunning"; - Verbosity = 2; - Desc = "Number of running local universe jobs in this schedd's queue"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalRemovedJobs"; - Verbosity = 1; - Desc = "Number of jobs that are in the process of being removed"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalRunningJobs"; - Desc = "Number of running jobs in this schedd's queue"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Aggregate = "SUM"; - Name = "Running Jobs in Pool"; - Value = TotalRunningJobs; - Desc = "Number of running jobs in schedds reporting to this pool"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalSchedulerJobsIdle"; - Verbosity = 2; - Desc = "Number of idle scheduler universe jobs in this schedd's queue"; - Units = "jobs"; - TargetType = "Scheduler"; -] -[ - Name = "TotalSchedulerJobsRunning"; - Verbosity = 2; - Desc = "Number of running scheduler universe jobs in this schedd's queue"; - Units = "jobs"; - TargetType = "Scheduler"; -] - -[ - Name = strcat(Name,"-TotalRunningJobs"); - Title = strcat(Name, " Total Running Jobs"); - Aggregate = "SUM"; - Value = RunningJobs; - Verbosity = 2; - Desc = strcat("Total number of running jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] -[ - Name = strcat(Name, "RunningJobs"); - Title = strcat(Name, " Running Jobs"); - Value = RunningJobs; - Verbosity = 2; - Desc = strcat("Number of running jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] -[ - Name = strcat(Name,"-TotalIdleJobs"); - Title = strcat(Name, " Total Idle Jobs"); - Aggregate = "SUM"; - Value = IdleJobs; - Verbosity = 2; - Desc = strcat("Total number of idle jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] -[ - Name = strcat(Name, "IdleJobs"); - Title = strcat(Name, " Idle Jobs"); - Value = IdleJobs; - Verbosity = 2; - Desc = strcat("Number of idle jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] -[ - Name = strcat(Name,"-TotalHeldJobs"); - Title = strcat(Name, " Total Held Jobs"); - Aggregate = "SUM"; - Value = HeldJobs; - Verbosity = 2; - Desc = strcat("Total number of held jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] -[ - Name = strcat(Name, "HeldJobs"); - Title = strcat(Name, " Held Jobs"); - Value = HeldJobs; - Verbosity = 2; - Desc = strcat("Number of held jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] -[ - Name = strcat(Name,"-TotalFlockedJobs"); - Title = strcat(Name, " Total Flocked Jobs"); - Aggregate = "SUM"; - Value = FlockedJobs; - Verbosity = 2; - Desc = strcat("Total number of flocked jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] -[ - Name = strcat(Name, "FlockedJobs"); - Title = strcat(Name, " Flocked Jobs"); - Value = FlockedJobs; - Verbosity = 2; - Desc = strcat("Number of flocked jobs from user ", Name); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Submitters"; -] - -[ - Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalRunningJobs"); - Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Running Jobs"); - Aggregate = "SUM"; - Value = RunningJobs; - Verbosity = 2; - Desc = strcat("Total number of running jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Accounting Groups"; -] -[ - Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalIdleJobs"); - Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Idle Jobs"); - Aggregate = "SUM"; - Value = IdleJobs; - Verbosity = 2; - Desc = strcat("Total number of idle jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Accounting Groups"; -] -[ - Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalHeldJobs"); - Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Held Jobs"); - Aggregate = "SUM"; - Value = HeldJobs; - Verbosity = 2; - Desc = strcat("Total number of held jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Accounting Groups"; -] -[ - Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalFlockedJobs"); - Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Flocked Jobs"); - Aggregate = "SUM"; - Value = FlockedJobs; - Verbosity = 2; - Desc = strcat("Total number of flocked jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); - Units = "jobs"; - TargetType = "Submitter"; - Group = "HTCondor Accounting Groups"; -] - - -[ - Name = "FileTransferDownloadBytes"; - Verbosity = 1; - Derivative = true; - Title = "File Transfer Download Bandwidth"; - Desc = "Output transfers from jobs"; - Units = "bytes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "FileTransferDownloadBytesPerSecond_5m"; - Verbosity = 1; - Desc = "Rate of output transfers from jobs"; - Units = "bytes/s"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Regex = "Owner_([^_]*)_FileTransferDownloadBytesPerSecond_5m"; - Title = "\\1 Download Bytes Per Second"; - Verbosity = 2; - Desc = "Rate of output transfers from jobs by user \\1"; - Units = "bytes/s"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "FileTransferFileReadLoad_5m"; - Verbosity = 1; - Desc = "Number of file transfer processes reading input data from files"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -/* This looks like a mismatch of stuff */ -[ - Name = "FileTransferFileReadSeconds"; - Verbosity = 99; - Derivative = true; - Title = "File Transfer File Read Load"; - Desc = "Number of file transfer processes reading input data from files"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "FileTransferFileWriteLoad_5m"; - Verbosity = 1; - Desc = "Number of file transfer processes writing output data to files"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -/* This looks like a mismatch of stuff */ -[ - Name = "FileTransferFileWriteSeconds"; - Verbosity = 99; - Derivative = true; - Title = "File Transfer File Write Load"; - Desc = "Number of file transfer processes writing output data to files"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "FileTransferNetReadLoad_5m"; - Verbosity = 1; - Desc = "Number of file transfer processes reading output data from the network"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -/* This looks like a mismatch of stuff */ -[ - Name = "FileTransferNetReadSeconds"; - Verbosity = 99; - Derivative = true; - Desc = "Number of file transfer processes reading output data from the network"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "FileTransferNetWriteLoad_5m"; - Verbosity = 1; - Desc = "Number of file transfer processes writing input data to the network"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -/* This looks like a mismatch of stuff */ -[ - Name = "FileTransferNetWriteSeconds"; - Verbosity = 99; - Derivative = true; - Title = "File Transfer Net Write Load"; - Desc = "Number of file transfer processes writing input data to the network"; - Units = "processes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "FileTransferUploadBytes"; - Derivative = true; - Title = "File Transfer Upload Bandwidth"; - Desc = "Input transfers to jobs"; - Units = "bytes"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "FileTransferUploadBytesPerSecond_5m"; - Verbosity = 1; - Desc = "Rate of input transfers to jobs"; - Units = "bytes/s"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Regex = "Owner_([^_]*)_FileTransferUploadBytesPerSecond_5m"; - Title = "\\1 Upload Bytes Per Second"; - Verbosity = 2; - Desc = "Rate of input transfers from jobs by user \\1"; - Units = "bytes/s"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "TransferQueueDownloadWaitTime"; - Desc = "Oldest output file transfer waiting in the transfer queue"; - Units = "seconds"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Aggregate = "MAX"; - Name = "Pool Max TransferQueueDownloadWaitTime"; - Value = TransferQueueDownloadWaitTime; - Desc = "Oldest output file transfer waiting in the transfer queues reporting to this pool"; - Units = "seconds"; - TargetType = "Scheduler"; -] -[ - Name = "TransferQueueNumDownloading"; - Desc = "Number of jobs actively transferring output"; - Units = "jobs"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "TransferQueueNumUploading"; - Desc = "Number of jobs actively transferring input"; - Units = "jobs"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "TransferQueueNumWaitingToDownload"; - Desc = "Number of jobs waiting in the transfer queue to transfer output"; - Units = "jobs"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "TransferQueueNumWaitingToUpload"; - Desc = "Number of jobs waiting in the transfer queue to transfer input"; - Units = "jobs"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Name = "TransferQueueUploadWaitTime"; - Desc = "Oldest input file transfer waiting in the transfer queue"; - Units = "seconds"; - TargetType = "Scheduler"; - Group = "HTCondor File Transfer"; -] -[ - Aggregate = "MAX"; - Name = "Pool Max TransferQueueUploadWaitTime"; - Value = TransferQueueUploadWaitTime; - Desc = "Oldest input file transfer waiting in the transfer queues reporting to this pool"; - Units = "seconds"; - TargetType = "Scheduler"; -] - -[ - Name = "LastNegotiationCycleActiveSubmitterCount0"; - Verbosity = 1; - Desc = "The number of job submitters considered in the negotiation cycle"; - Units = "submitters"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleCandidateSlots0"; - Verbosity = 2; - Desc = "The number of slot ClassAds considered for matchmaking (reduced by NEGOTIATOR_SLOT_POOLSIZE_CONSTRAINT if applicable)"; - Units = "slots"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleDuration0"; - Desc = "The number of seconds that it took to complete the negotiation cycle"; - Units = "seconds"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleMatches0"; - Verbosity = 1; - Desc = "The number of successful matches that were made in the negotiation cycle"; - Units = "matches"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleMatchRate0"; - Verbosity = 1; - Desc = "Matches made per second during negotiation cycle"; - Units = "matches/s"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleMatchRateSustained0"; - Verbosity = 1; - Desc = "Matches made per second, including waiting time between negotiation cycles"; - Units = "matches/s"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleNumIdleJobs0"; - Verbosity = 1; - Desc = "The number of idle jobs belonging to job submitters"; - Units = "jobs"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleNumJobsConsidered0"; - Verbosity = 1; - Desc = "The number of jobs considered for matchmaking (may be mutch lower than idle jobs due to auto-cluster optimizations)"; - Units = "jobs"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleNumSchedulers0"; - Verbosity = 2; - Desc = "The number of schedds involved in negotiation for resources"; - Units = "schedds"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCyclePeriod0"; - Verbosity = 1; - Desc = "Seconds between the end of one cycle the the end of the next"; - Units = "seconds"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCyclePhase1Duration0"; - Verbosity = 2; - Desc = "Duration of Phase 1: getting submitter and machine ClassAds"; - Units = "seconds"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCyclePhase2Duration0"; - Verbosity = 2; - Desc = "Duration of Phase 2: filtering slots and processing accounting group configuration"; - Units = "seconds"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCyclePhase3Duration0"; - Verbosity = 2; - Desc = "Phase 3 of the negotiation cycle: sorting submitters by priority"; - Units = "seconds"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCyclePhase4Duration0"; - Verbosity = 2; - Desc = "Phase 4 of the negotiation cycle: matching slots to jobs"; - Units = "seconds"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleRejections0"; - Verbosity = 1; - Desc = "The number of rejections that occurred in the negotiation cycle (only one per auto-cluster)"; - Units = "jobs"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleSlotShareIter0"; - Verbosity = 2; - Desc = "The number of iterations in the negotiation cycle"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleTotalSlots0"; - Verbosity = 1; - Desc = "The total number of slot ClassAds that matched NEGOTIATOR_SLOT_CONSTRAINT"; - Units = "slots"; - TargetType = "Negotiator"; -] -[ - Name = "LastNegotiationCycleTrimmedSlots0"; - Verbosity = 2; - Desc = "The number of slot ClassAds considered for matchmaking, after filtering by Negotiator_CONSIDER_PREEMPTION, if applicable"; - Units = "slots"; - TargetType = "Negotiator"; -] - -[ - Name = "ExpectedMachineGracefulDrainingBadput"; - Verbosity = 2; - Desc = "Job runtime that would be lost if graceful draining were initiated now."; - Units = "cpus*seconds"; - TargetType = "Machine_slot1"; -] -[ - Name = "ExpectedMachineGracefulDrainingCompletion"; - Value = ExpectedMachineGracefulDrainingCompletion - time(); - Verbosity = 2; - Desc = "Time graceful draining could take to complete, assuming jobs take full retirement and vacate time and there is no suspension"; - Units = "seconds"; - TargetType = "Machine_slot1"; -] -[ - Name = "ExpectedMachineQuickDrainingBadput"; - Verbosity = 2; - Desc = "Job runtime that would be lost if quick draining were initiated now."; - Units = "cpus*seconds"; - TargetType = "Machine_slot1"; -] -[ - Name = "ExpectedMachineQuickDrainingCompletion"; - Verbosity = 2; - Desc = "Time quick draining could take to complete, assuming jobs take full retirement and vacate time and there is no suspension"; - Units = "seconds"; - TargetType = "Machine_slot1"; -] -[ - Name = "Linpack"; - Value = KFlops; - Verbosity = 2; - Desc = "Linpack floating point benchmark"; - Units = "FLOPS"; - Scale = 1000; - Type = "float"; - TargetType = "Machine_slot1"; -] -[ - Name = "Dhrystone"; - Value = Mips; - Verbosity = 2; - Desc = "Dhrystone integer benchmark"; - Units = "Iterations/sec"; - Scale = 1000000; - Type = "float"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalCondorLoadAvg"; - Verbosity = 1; - Desc = "The CPU load attributed to jobs"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalCpus"; - Verbosity = 2; - Desc = "Number of cores"; - Units = "cores"; - TargetType = "Machine_slot1"; -] -[ - Aggregate = "SUM"; - Name = "Cpus in Pool"; - Value = TotalCpus; - Verbosity = 2; - Desc = "Number of cores in the pool"; - Units = "cores"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalDisk"; - Verbosity = 2; - Desc = "Disk space in the job execute directory"; - Units = "bytes"; - Scale = 1024; - Type = "float"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalLoadAvg"; - Verbosity = 2; - Desc = "System load average"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalMemory"; - Verbosity = 2; - Desc = "RAM"; - Units = "bytes"; - Scale = 1048576; - Type = "float"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalSlots"; - Verbosity = 2; - Desc = "Number of slots"; - Units = "slots"; - TargetType = "Machine_slot1"; -] -[ - Aggregate = "SUM"; - Name = "Pool Slot Count"; - Value = TotalSlots; - Desc = "Number of slots in the pool"; - Units = "slots"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalMachineDrainingBadput"; - Verbosity = 1; - Desc = "Job runtime that has been lost due to job evictions caused by draining"; - Units = "cpus*seconds"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalMachineDrainingUnclaimedTime"; - Verbosity = 1; - Desc = "Time that has not been used due to draining"; - Units = "cpus*seconds"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalVirtualMemory"; - Verbosity = 2; - Desc = "Addressable memory (RAM plus swap)"; - Units = "bytes"; - Scale = 1024; - Type = "float"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalPreemptions"; - Verbosity = 2; - Desc = "Total number of preempted jobs on this startd"; - Units = "preemptions"; - TargetType = "Machine_slot1"; -] -[ - Name = "TotalJobStarts"; - Verbosity = 2; - Desc = "Total number of jobs started on this startd since boot"; - Units = "jobs"; - TargetType = "Machine_slot1"; -] -[ - Aggregate = "SUM"; - Name = "Poolwide Preemptions"; - Value = TotalPreemptions; - Verbosity = 2; - Desc = "Poolwide Preemptions"; - Units = "preemptions"; - TargetType = "Machine_slot1"; -] -[ - Aggregate = "SUM"; - Name = "Poolwide Job Starts"; - Value = TotalJobStarts; - Verbosity = 2; - Desc = "Poolwide Job Starts"; - Units = "jobs"; - TargetType = "Machine_slot1"; -] -[ - Name = "AutoClusters"; - Desc = "Number of active AutoClusters in the schedd"; - Units = "autoclusters"; - TargetType = "Scheduler"; -] -[ - Aggregate = "SUM"; - Name = "AutoClusters in Pool"; - Value = AutoClusters; - Desc = "Number of active AutoClusters in schedds reporting to this pool"; - Units = "autoclusters"; - TargetType = "Scheduler"; -] -[ - Name = strcat(MyType,"WholeMachines"); - Value = WholeMachines; - Verbosity = 2; - Desc = "Number of machines that were observed to be defragmented in the last polling interval"; - TargetType = "Defrag"; -] -[ - Name = strcat(MyType,"MachinesDraining"); - Value = MachinesDraining; - Verbosity = 2; - Desc = "Number of machines that were observed to be draining in the last polling interval"; - TargetType = "Defrag"; -] -[ - Name = strcat(MyType,"RecentDrainSuccesses"); - Value = RecentDrainSuccesses; - Verbosity = 2; - Desc = "Count of successful attempts to initiate draining during the past RecentStatsLifetime seconds"; - TargetType = "Defrag"; -] -[ - Name = strcat(MyType,"RecentDrainFailures"); - Value = RecentDrainFailures; - Verbosity = 2; - Desc = "Count of failed attempts to initiate draining during the past RecentStatsLifetime seconds"; - TargetType = "Defrag"; -] -[ - Name = strcat(MyType,"AvgDrainingUnclaimed"); - Value = AvgDrainingUnclaimed; - Verbosity = 2; - Desc = "Fraction of time CPUs in the pool have spent unclaimed by a user during draining of the machine"; - TargetType = "Defrag"; -] -[ - Name = strcat(MyType,"WholeMachinesPeak"); - Value = WholeMachinesPeak; - Verbosity = 2; - Desc = "Largest number of machines that were ever observed to be simultaneously defragmented"; - TargetType = "Defrag"; -] -[ - Name = strcat(MyType,"AvgDrainingBadput"); - Value = AvgDrainingBadput; - Verbosity = 2; - Desc = "Fraction of time CPUs in the pool have spent on jobs that were killed during draining of the machine"; - TargetType = "Defrag"; -] -[ - Name = strcat(MyType,"MachinesDrainingPeak"); - Value = MachinesDrainingPeak; - Verbosity = 2; - Desc = "Largest number of machines that were ever observed to be draining"; - TargetType = "Defrag"; -] diff --git a/roles/docker-htcondor/files/conf/exec/config.d/50-condor-exec b/roles/docker-htcondor/files/conf/exec/config.d/50-condor-exec deleted file mode 100644 index 354acf6..0000000 --- a/roles/docker-htcondor/files/conf/exec/config.d/50-condor-exec +++ /dev/null @@ -1 +0,0 @@ -use ROLE:execute diff --git a/roles/docker-htcondor/files/conf/sub/config.d/50-condor-submit b/roles/docker-htcondor/files/conf/sub/config.d/50-condor-submit deleted file mode 100644 index 76b79d6..0000000 --- a/roles/docker-htcondor/files/conf/sub/config.d/50-condor-submit +++ /dev/null @@ -1 +0,0 @@ -use ROLE:submit diff --git a/roles/docker-htcondor/files/htcondor.Dockerfile b/roles/docker-htcondor/files/htcondor.Dockerfile deleted file mode 100644 index 6f723cd..0000000 --- a/roles/docker-htcondor/files/htcondor.Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM docker.io/library/centos:7 - -RUN yum install -y https://research.cs.wisc.edu/htcondor/repo/8.9/htcondor-release-current.el7.noarch.rpm && \ - yum install --nogpgcheck -y condor && \ - yum install -y less && \ - yum clean all - -RUN yum install -y iproute bind-utils nmap-ncat net-tools && \ - yum clean all - -CMD bash -c 'cat <({ condor_master -f & tail --retry --pid $! -f /var/log/condor/MasterLog & })' diff --git a/roles/docker-htcondor/tasks/main.yml b/roles/docker-htcondor/tasks/main.yml deleted file mode 100644 index bbdeb72..0000000 --- a/roles/docker-htcondor/tasks/main.yml +++ /dev/null @@ -1,142 +0,0 @@ -- name: "htcondor docker image" - file: - path: "/container/docker-images/htcondor" - state: directory - owner: "{{unpriv_user}}" - group: docker - mode: "u=rwx,g=rwx,o=rx" - -- copy: - dest: "/container/docker-images/htcondor/Dockerfile" - src: "htcondor.Dockerfile" - owner: "{{unpriv_user}}" - group: docker - register: cp_dockerfile - -- docker_image: - name: "htcondor" -# pull: False - build: - pull: False - path: "/container/docker-images/htcondor" - source: build - force_source: "{{cp_dockerfile.changed}}" - -- name: "copy htcondor container configuration" - copy: - src: "conf/{{item}}/" - dest: "/container/volumes/{{item}}/" - owner: "{{unpriv_user}}" - group: docker - mode: "u=rwx,g=rwx" - with_items: [ "cm", "exec", "sub", "common"] - -- name: "check if pool shared secret exists" - stat: - path: "/container/volumes/common/passwords.d/POOL" - register: pool_pw - -- block: - - name: "create temporary password store" - tempfile: - state: directory - register: pool_pw_tmp - - - name: "generate pool password" - copy: - dest: "{{pool_pw_tmp.path}}/poolpw" - content: "{{lookup('password','/dev/null')}}" - no_log: True - - - name: "install pool password" - docker_container: - name: "condor-common" - image: htcondor - state: started - volumes: - - "/container/volumes/common/:/etc/condor/:rw" - - "{{pool_pw_tmp.path}}:/tmp/poolpw:ro" - detach: False - cleanup: True - command: "condor_store_cred add -c -i /tmp/poolpw/poolpw" - - - name: "remove tokens since pool password (cert) changed" - file: - path: "/container/volumes/{{item}}/tokens.d/condor@htc.local" - state: absent - with_items: [ "cm", "exec", "sub" ] - - always: - - name: "remove temporary password store" - file: - path: "{{pool_pw_tmp.path}}" - state: absent - when: pool_pw_tmp is defined and pool_pw_tmp.path - when: not pool_pw.stat.exists - -- name: "sync common files to individual containers" - copy: - remote_src: True - force: True - directory_mode: preserve - mode: preserve - src: "/container/volumes/common/" - dest: "/container/volumes/{{item}}/" - with_items: [ "cm", "exec", "sub"] - - -- name: "collect tokens to generate" - stat: - path: "/container/volumes/{{item}}/tokens.d/condor@htc.local" - with_items: [ "cm", "exec", "sub" ] - register: tokens_state - -- name: "generate tokens" - docker_container: - name: "condor-common" - image: htcondor - state: started - volumes: - - "/container/volumes/{{item}}/:/etc/condor/:rw" - detach: False - cleanup: True - command: "condor_token_create -identity condor@$(domainname) -token /etc/condor/tokens.d/condor@htc.local" - with_items: "{{tokens_state.results | rejectattr('stat.exists') | map(attribute='item') | list }}" - - -- name: "create docker network to make service discovery work" - docker_network: - name: condor - state: present - -# TODO: reserve some address using docker_network_info and assign as aux -# address to enable cm to get a static address in order to be reachable from -# htcondor running on docker host to enable submitting jobs. - -- name: "run htcondor containers" - docker_container: - name: "condor-{{item}}" - hostname: "condor-{{item}}" - domainname: "htc.local" - image: htcondor - state: started - detach: True - cleanup: True - networks_cli_compatible: True - networks: - - name: "condor" - aliases: [ "condor-{{item}}.htc.local" ] - volumes: - - "/container/volumes/{{item}}/:/etc/condor/:rw" - with_items: [ "cm", "exec", "sub"] -# auto_remove: True -# mounts: -# src: /container/volumes/cm/ -# dest: /etc/condor/ - - -#- add_host: -# hostname: foo -# ansible_connection: docker_api -# docker_host: ssh://ed-c7-1.virt.magni.thoto.net - diff --git a/roles/docker/tasks/main.yml b/roles/docker/tasks/main.yml index 5946f94..bb383b1 100644 --- a/roles/docker/tasks/main.yml +++ b/roles/docker/tasks/main.yml @@ -8,41 +8,6 @@ name: [ "docker-ce", "python-docker-py" ] # latter for ansible modules state: present -- name: "partition container image disk" - parted: - device: /dev/vdb - number: 1 - state: present -# fs_type: xfs - -- filesystem: - dev: /dev/vdb1 - fstype: xfs - opts: "-L image-store" - -- mount: - path: "/container" - src: "/dev/vdb1" - fstype: xfs - opts: "noatime" - state: mounted - -- file: - path: "/container/docker" - state: directory - owner: root - group: root - mode: "u=rwx,g=x,o=x" - -- name: "link docker configuration to new container partition" - file: - path: "/var/lib/docker" - src: "/container/docker" - state: link - owner: root - group: root - mode: "u=rwx,g=x,o=x" - - name: "enable docker service in systemd" service: name: docker diff --git a/roles/slurm/library/copy.py b/roles/slurm/library/copy.py deleted file mode 100644 index 3e80b6b..0000000 --- a/roles/slurm/library/copy.py +++ /dev/null @@ -1,793 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright: (c) 2012, Michael DeHaan -# Copyright: (c) 2017, Ansible Project -# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) - -from __future__ import absolute_import, division, print_function -__metaclass__ = type - -ANSIBLE_METADATA = {'metadata_version': '1.1', - 'status': ['stableinterface'], - 'supported_by': 'core'} - -DOCUMENTATION = r''' ---- -module: copy -version_added: historical -short_description: Copy files to remote locations -description: - - The C(copy) module copies a file from the local or remote machine to a location on the remote machine. - - Use the M(fetch) module to copy files from remote locations to the local box. - - If you need variable interpolation in copied files, use the M(template) module. Using a variable in the C(content) - field will result in unpredictable output. - - For Windows targets, use the M(win_copy) module instead. -options: - src: - description: - - Local path to a file to copy to the remote server. - - This can be absolute or relative. - - If path is a directory, it is copied recursively. In this case, if path ends - with "/", only inside contents of that directory are copied to destination. - Otherwise, if it does not end with "/", the directory itself with all contents - is copied. This behavior is similar to the C(rsync) command line tool. - type: path - content: - description: - - When used instead of C(src), sets the contents of a file directly to the specified value. - - Works only when C(dest) is a file. Creates the file if it does not exist. - - For advanced formatting or if C(content) contains a variable, use the M(template) module. - type: str - version_added: '1.1' - dest: - description: - - Remote absolute path where the file should be copied to. - - If C(src) is a directory, this must be a directory too. - - If C(dest) is a non-existent path and if either C(dest) ends with "/" or C(src) is a directory, C(dest) is created. - - If I(dest) is a relative path, the starting directory is determined by the remote host. - - If C(src) and C(dest) are files, the parent directory of C(dest) is not created and the task fails if it does not already exist. - type: path - required: yes - backup: - description: - - Create a backup file including the timestamp information so you can get the original file back if you somehow clobbered it incorrectly. - type: bool - default: no - version_added: '0.7' - force: - description: - - Influence whether the remote file must always be replaced. - - If C(yes), the remote file will be replaced when contents are different than the source. - - If C(no), the file will only be transferred if the destination does not exist. - - Alias C(thirsty) has been deprecated and will be removed in 2.13. - type: bool - default: yes - aliases: [ thirsty ] - version_added: '1.1' - mode: - description: - - The permissions of the destination file or directory. - - For those used to C(/usr/bin/chmod) remember that modes are actually octal numbers. - You must either add a leading zero so that Ansible's YAML parser knows it is an octal number - (like C(0644) or C(01777))or quote it (like C('644') or C('1777')) so Ansible receives a string - and can do its own conversion from string into number. Giving Ansible a number without following - one of these rules will end up with a decimal number which will have unexpected results. - - As of Ansible 1.8, the mode may be specified as a symbolic mode (for example, C(u+rwx) or C(u=rw,g=r,o=r)). - - As of Ansible 2.3, the mode may also be the special string C(preserve). - - C(preserve) means that the file will be given the same permissions as the source file. - type: path - directory_mode: - description: - - When doing a recursive copy set the mode for the directories. - - If this is not set we will use the system defaults. - - The mode is only set on directories which are newly created, and will not affect those that already existed. - type: raw - version_added: '1.5' - remote_src: - description: - - Influence whether C(src) needs to be transferred or already is present remotely. - - If C(no), it will search for C(src) at originating/master machine. - - If C(yes) it will go to the remote/target machine for the C(src). - - C(remote_src) supports recursive copying as of version 2.8. - - C(remote_src) only works with C(mode=preserve) as of version 2.6. - type: bool - default: no - version_added: '2.0' - follow: - description: - - This flag indicates that filesystem links in the destination, if they exist, should be followed. - type: bool - default: no - version_added: '1.8' - local_follow: - description: - - This flag indicates that filesystem links in the source tree, if they exist, should be followed. - type: bool - default: yes - version_added: '2.4' - checksum: - description: - - SHA1 checksum of the file being transferred. - - Used to validate that the copy of the file was successful. - - If this is not provided, ansible will use the local calculated checksum of the src file. - type: str - version_added: '2.5' -extends_documentation_fragment: -- decrypt -- files -- validate -notes: -- The M(copy) module recursively copy facility does not scale to lots (>hundreds) of files. -seealso: -- module: assemble -- module: fetch -- module: file -- module: synchronize -- module: template -- module: win_copy -author: -- Ansible Core Team -- Michael DeHaan -''' - -EXAMPLES = r''' -- name: Copy file with owner and permissions - copy: - src: /srv/myfiles/foo.conf - dest: /etc/foo.conf - owner: foo - group: foo - mode: '0644' - -- name: Copy file with owner and permission, using symbolic representation - copy: - src: /srv/myfiles/foo.conf - dest: /etc/foo.conf - owner: foo - group: foo - mode: u=rw,g=r,o=r - -- name: Another symbolic mode example, adding some permissions and removing others - copy: - src: /srv/myfiles/foo.conf - dest: /etc/foo.conf - owner: foo - group: foo - mode: u+rw,g-wx,o-rwx - -- name: Copy a new "ntp.conf file into place, backing up the original if it differs from the copied version - copy: - src: /mine/ntp.conf - dest: /etc/ntp.conf - owner: root - group: root - mode: '0644' - backup: yes - -- name: Copy a new "sudoers" file into place, after passing validation with visudo - copy: - src: /mine/sudoers - dest: /etc/sudoers - validate: /usr/sbin/visudo -csf %s - -- name: Copy a "sudoers" file on the remote machine for editing - copy: - src: /etc/sudoers - dest: /etc/sudoers.edit - remote_src: yes - validate: /usr/sbin/visudo -csf %s - -- name: Copy using inline content - copy: - content: '# This file was moved to /etc/other.conf' - dest: /etc/mine.conf - -- name: If follow=yes, /path/to/file will be overwritten by contents of foo.conf - copy: - src: /etc/foo.conf - dest: /path/to/link # link to /path/to/file - follow: yes - -- name: If follow=no, /path/to/link will become a file and be overwritten by contents of foo.conf - copy: - src: /etc/foo.conf - dest: /path/to/link # link to /path/to/file - follow: no -''' - -RETURN = r''' -dest: - description: Destination file/path - returned: success - type: str - sample: /path/to/file.txt -src: - description: Source file used for the copy on the target machine - returned: changed - type: str - sample: /home/httpd/.ansible/tmp/ansible-tmp-1423796390.97-147729857856000/source -md5sum: - description: MD5 checksum of the file after running copy - returned: when supported - type: str - sample: 2a5aeecc61dc98c4d780b14b330e3282 -checksum: - description: SHA1 checksum of the file after running copy - returned: success - type: str - sample: 6e642bb8dd5c2e027bf21dd923337cbb4214f827 -backup_file: - description: Name of backup file created - returned: changed and if backup=yes - type: str - sample: /path/to/file.txt.2015-02-12@22:09~ -gid: - description: Group id of the file, after execution - returned: success - type: int - sample: 100 -group: - description: Group of the file, after execution - returned: success - type: str - sample: httpd -owner: - description: Owner of the file, after execution - returned: success - type: str - sample: httpd -uid: - description: Owner id of the file, after execution - returned: success - type: int - sample: 100 -mode: - description: Permissions of the target, after execution - returned: success - type: str - sample: 0644 -size: - description: Size of the target, after execution - returned: success - type: int - sample: 1220 -state: - description: State of the target, after execution - returned: success - type: str - sample: file -''' - -import errno -import filecmp -import grp -import os -import os.path -import platform -import pwd -import shutil -import stat -import tempfile -import traceback - -from ansible.module_utils.basic import AnsibleModule -from ansible.module_utils.common.process import get_bin_path -from ansible.module_utils._text import to_bytes, to_native -from ansible.module_utils.six import PY3 - - -# The AnsibleModule object -module = None - - -class AnsibleModuleError(Exception): - def __init__(self, results): - self.results = results - - -# Once we get run_command moved into common, we can move this into a common/files module. We can't -# until then because of the module.run_command() method. We may need to move it into -# basic::AnsibleModule() until then but if so, make it a private function so that we don't have to -# keep it for backwards compatibility later. -def clear_facls(path): - setfacl = get_bin_path('setfacl', True) - # FIXME "setfacl -b" is available on Linux and FreeBSD. There is "setfacl -D e" on z/OS. Others? - acl_command = [setfacl, '-b', path] - b_acl_command = [to_bytes(x) for x in acl_command] - rc, out, err = module.run_command(b_acl_command, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C')) - if rc != 0: - raise RuntimeError('Error running "{0}": stdout: "{1}"; stderr: "{2}"'.format(' '.join(b_acl_command), out, err)) - - -def split_pre_existing_dir(dirname): - ''' - Return the first pre-existing directory and a list of the new directories that will be created. - ''' - head, tail = os.path.split(dirname) - b_head = to_bytes(head, errors='surrogate_or_strict') - if head == '': - return ('.', [tail]) - if not os.path.exists(b_head): - if head == '/': - raise AnsibleModuleError(results={'msg': "The '/' directory doesn't exist on this machine."}) - (pre_existing_dir, new_directory_list) = split_pre_existing_dir(head) - else: - return (head, [tail]) - new_directory_list.append(tail) - return (pre_existing_dir, new_directory_list) - - -def adjust_recursive_directory_permissions(pre_existing_dir, new_directory_list, module, directory_args, changed): - ''' - Walk the new directories list and make sure that permissions are as we would expect - ''' - - if new_directory_list: - working_dir = os.path.join(pre_existing_dir, new_directory_list.pop(0)) - directory_args['path'] = working_dir - changed = module.set_fs_attributes_if_different(directory_args, changed) - changed = adjust_recursive_directory_permissions(working_dir, new_directory_list, module, directory_args, changed) - return changed - - -def chown_recursive(path, module): - changed = False - owner = module.params['owner'] - group = module.params['group'] - - if owner is not None: - if not module.check_mode: - for dirpath, dirnames, filenames in os.walk(path): - owner_changed = module.set_owner_if_different(dirpath, owner, False) - if owner_changed is True: - changed = owner_changed - for dir in [os.path.join(dirpath, d) for d in dirnames]: - owner_changed = module.set_owner_if_different(dir, owner, False) - if owner_changed is True: - changed = owner_changed - for file in [os.path.join(dirpath, f) for f in filenames]: - owner_changed = module.set_owner_if_different(file, owner, False) - if owner_changed is True: - changed = owner_changed - else: - uid = pwd.getpwnam(owner).pw_uid - for dirpath, dirnames, filenames in os.walk(path): - owner_changed = (os.stat(dirpath).st_uid != uid) - if owner_changed is True: - changed = owner_changed - for dir in [os.path.join(dirpath, d) for d in dirnames]: - owner_changed = (os.stat(dir).st_uid != uid) - if owner_changed is True: - changed = owner_changed - for file in [os.path.join(dirpath, f) for f in filenames]: - owner_changed = (os.stat(file).st_uid != uid) - if owner_changed is True: - changed = owner_changed - if group is not None: - if not module.check_mode: - for dirpath, dirnames, filenames in os.walk(path): - group_changed = module.set_group_if_different(dirpath, group, False) - if group_changed is True: - changed = group_changed - for dir in [os.path.join(dirpath, d) for d in dirnames]: - group_changed = module.set_group_if_different(dir, group, False) - if group_changed is True: - changed = group_changed - for file in [os.path.join(dirpath, f) for f in filenames]: - group_changed = module.set_group_if_different(file, group, False) - if group_changed is True: - changed = group_changed - else: - gid = grp.getgrnam(group).gr_gid - for dirpath, dirnames, filenames in os.walk(path): - group_changed = (os.stat(dirpath).st_gid != gid) - if group_changed is True: - changed = group_changed - for dir in [os.path.join(dirpath, d) for d in dirnames]: - group_changed = (os.stat(dir).st_gid != gid) - if group_changed is True: - changed = group_changed - for file in [os.path.join(dirpath, f) for f in filenames]: - group_changed = (os.stat(file).st_gid != gid) - if group_changed is True: - changed = group_changed - - return changed - - -def copy_diff_files(src, dest, module): - changed = False - owner = module.params['owner'] - group = module.params['group'] - local_follow = module.params['local_follow'] - diff_files = filecmp.dircmp(src, dest).diff_files - if len(diff_files): - changed = True - if not module.check_mode: - for item in diff_files: - src_item_path = os.path.join(src, item) - dest_item_path = os.path.join(dest, item) - b_src_item_path = to_bytes(src_item_path, errors='surrogate_or_strict') - b_dest_item_path = to_bytes(dest_item_path, errors='surrogate_or_strict') - if os.path.islink(b_src_item_path) and local_follow is False: - linkto = os.readlink(b_src_item_path) - os.symlink(linkto, b_dest_item_path) - else: - shutil.copyfile(b_src_item_path, b_dest_item_path) - shutil.copymode(b_src_item_path, b_dest_item_path) - - if owner is not None: - module.set_owner_if_different(b_dest_item_path, owner, False) - if group is not None: - module.set_group_if_different(b_dest_item_path, group, False) - changed = True - return changed - - -def copy_left_only(src, dest, module): - changed = False - owner = module.params['owner'] - group = module.params['group'] - local_follow = module.params['local_follow'] - left_only = filecmp.dircmp(src, dest).left_only - if len(left_only): - changed = True - if not module.check_mode: - for item in left_only: - src_item_path = os.path.join(src, item) - dest_item_path = os.path.join(dest, item) - b_src_item_path = to_bytes(src_item_path, errors='surrogate_or_strict') - b_dest_item_path = to_bytes(dest_item_path, errors='surrogate_or_strict') - - if os.path.islink(b_src_item_path) and os.path.isdir(b_src_item_path) and local_follow is True: - shutil.copytree(b_src_item_path, b_dest_item_path, symlinks=not(local_follow)) - chown_recursive(b_dest_item_path, module) - - if os.path.islink(b_src_item_path) and os.path.isdir(b_src_item_path) and local_follow is False: - linkto = os.readlink(b_src_item_path) - os.symlink(linkto, b_dest_item_path) - - if os.path.islink(b_src_item_path) and os.path.isfile(b_src_item_path) and local_follow is True: - shutil.copyfile(b_src_item_path, b_dest_item_path) - if owner is not None: - module.set_owner_if_different(b_dest_item_path, owner, False) - if group is not None: - module.set_group_if_different(b_dest_item_path, group, False) - - if os.path.islink(b_src_item_path) and os.path.isfile(b_src_item_path) and local_follow is False: - linkto = os.readlink(b_src_item_path) - os.symlink(linkto, b_dest_item_path) - - if not os.path.islink(b_src_item_path) and os.path.isfile(b_src_item_path): - shutil.copyfile(b_src_item_path, b_dest_item_path) - shutil.copymode(b_src_item_path, b_dest_item_path) - if owner is not None: - module.set_owner_if_different(b_dest_item_path, owner, False) - if group is not None: - module.set_group_if_different(b_dest_item_path, group, False) - - if not os.path.islink(b_src_item_path) and os.path.isdir(b_src_item_path): - shutil.copytree(b_src_item_path, b_dest_item_path, symlinks=not(local_follow)) - chown_recursive(b_dest_item_path, module) - - changed = True - return changed - - -def copy_common_dirs(src, dest, module): - changed = False - common_dirs = filecmp.dircmp(src, dest).common_dirs - for item in common_dirs: - src_item_path = os.path.join(src, item) - dest_item_path = os.path.join(dest, item) - b_src_item_path = to_bytes(src_item_path, errors='surrogate_or_strict') - b_dest_item_path = to_bytes(dest_item_path, errors='surrogate_or_strict') - diff_files_changed = copy_diff_files(b_src_item_path, b_dest_item_path, module) - left_only_changed = copy_left_only(b_src_item_path, b_dest_item_path, module) - if diff_files_changed or left_only_changed: - changed = True - - # recurse into subdirectory - changed = changed or copy_common_dirs(os.path.join(src, item), os.path.join(dest, item), module) - return changed - - -def main(): - - global module - - module = AnsibleModule( - # not checking because of daisy chain to file module - argument_spec=dict( - src=dict(type='path'), - _original_basename=dict(type='str'), # used to handle 'dest is a directory' via template, a slight hack - content=dict(type='str', no_log=True), - dest=dict(type='path', required=True), - backup=dict(type='bool', default=False), - force=dict(type='bool', default=True, aliases=['thirsty']), - validate=dict(type='str'), - directory_mode=dict(type='raw'), - remote_src=dict(type='bool'), - local_follow=dict(type='bool'), - checksum=dict(type='str'), - ), - add_file_common_args=True, - supports_check_mode=True, - ) - - if module.params.get('thirsty'): - module.deprecate('The alias "thirsty" has been deprecated and will be removed, use "force" instead', version='2.13') - - src = module.params['src'] - b_src = to_bytes(src, errors='surrogate_or_strict') - dest = module.params['dest'] - # Make sure we always have a directory component for later processing - if os.path.sep not in dest: - dest = '.{0}{1}'.format(os.path.sep, dest) - b_dest = to_bytes(dest, errors='surrogate_or_strict') - backup = module.params['backup'] - force = module.params['force'] - _original_basename = module.params.get('_original_basename', None) - validate = module.params.get('validate', None) - follow = module.params['follow'] - local_follow = module.params['local_follow'] - mode = module.params['mode'] - owner = module.params['owner'] - group = module.params['group'] - remote_src = module.params['remote_src'] - checksum = module.params['checksum'] - - if not os.path.exists(b_src): - module.fail_json(msg="Source %s not found" % (src)) - if not os.access(b_src, os.R_OK): - module.fail_json(msg="Source %s not readable" % (src)) - - # Preserve is usually handled in the action plugin but mode + remote_src has to be done on the - # remote host - if module.params['mode'] == 'preserve': - module.params['mode'] = '0%03o' % stat.S_IMODE(os.stat(b_src).st_mode) - mode = module.params['mode'] - - checksum_dest = None - - if os.path.isfile(src): - checksum_src = module.sha1(src) - else: - checksum_src = None - - # Backwards compat only. This will be None in FIPS mode - try: - if os.path.isfile(src): - md5sum_src = module.md5(src) - else: - md5sum_src = None - except ValueError: - md5sum_src = None - - changed = False - - if checksum and checksum_src != checksum: - module.fail_json( - msg='Copied file does not match the expected checksum. Transfer failed.', - checksum=checksum_src, - expected_checksum=checksum - ) - - # Special handling for recursive copy - create intermediate dirs - if dest.endswith(os.sep): - if _original_basename: - dest = os.path.join(dest, _original_basename) - b_dest = to_bytes(dest, errors='surrogate_or_strict') - dirname = os.path.dirname(dest) - b_dirname = to_bytes(dirname, errors='surrogate_or_strict') - if not os.path.exists(b_dirname): - try: - (pre_existing_dir, new_directory_list) = split_pre_existing_dir(dirname) - except AnsibleModuleError as e: - e.result['msg'] += ' Could not copy to {0}'.format(dest) - module.fail_json(**e.results) - - os.makedirs(b_dirname) - directory_args = module.load_file_common_arguments(module.params) - directory_mode = module.params["directory_mode"] - if directory_mode is not None: - directory_args['mode'] = directory_mode - else: - directory_args['mode'] = None - adjust_recursive_directory_permissions(pre_existing_dir, new_directory_list, module, directory_args, changed) - - if os.path.isdir(b_dest): - basename = os.path.basename(src) - if _original_basename: - basename = _original_basename - dest = os.path.join(dest, basename) - b_dest = to_bytes(dest, errors='surrogate_or_strict') - - if os.path.exists(b_dest): - if os.path.islink(b_dest) and follow: - b_dest = os.path.realpath(b_dest) - dest = to_native(b_dest, errors='surrogate_or_strict') - if not force: - module.exit_json(msg="file already exists", src=src, dest=dest, changed=False) - if os.access(b_dest, os.R_OK) and os.path.isfile(b_dest): - checksum_dest = module.sha1(dest) - else: - if not os.path.exists(os.path.dirname(b_dest)): - try: - # os.path.exists() can return false in some - # circumstances where the directory does not have - # the execute bit for the current user set, in - # which case the stat() call will raise an OSError - os.stat(os.path.dirname(b_dest)) - except OSError as e: - if "permission denied" in to_native(e).lower(): - module.fail_json(msg="Destination directory %s is not accessible" % (os.path.dirname(dest))) - module.fail_json(msg="Destination directory %s does not exist" % (os.path.dirname(dest))) - - if not os.access(os.path.dirname(b_dest), os.W_OK) and not module.params['unsafe_writes']: - module.fail_json(msg="Destination %s not writable" % (os.path.dirname(dest))) - - backup_file = None - if checksum_src != checksum_dest or os.path.islink(b_dest): - if not module.check_mode: - try: - if backup: - if os.path.exists(b_dest): - backup_file = module.backup_local(dest) - # allow for conversion from symlink. - if os.path.islink(b_dest): - os.unlink(b_dest) - open(b_dest, 'w').close() - if validate: - # if we have a mode, make sure we set it on the temporary - # file source as some validations may require it - if mode is not None: - module.set_mode_if_different(src, mode, False) - if owner is not None: - module.set_owner_if_different(src, owner, False) - if group is not None: - module.set_group_if_different(src, group, False) - if "%s" not in validate: - module.fail_json(msg="validate must contain %%s: %s" % (validate)) - (rc, out, err) = module.run_command(validate % src) - if rc != 0: - module.fail_json(msg="failed to validate", exit_status=rc, stdout=out, stderr=err) - b_mysrc = b_src - if remote_src and os.path.isfile(b_src): - _, b_mysrc = tempfile.mkstemp(dir=os.path.dirname(b_dest)) - - shutil.copyfile(b_src, b_mysrc) - try: - shutil.copystat(b_src, b_mysrc) - except OSError as err: - if err.errno == errno.ENOSYS and mode == "preserve": - module.warn("Unable to copy stats {0}".format(to_native(b_src))) - else: - raise - - # might be needed below - if PY3 and hasattr(os, 'listxattr'): - try: - src_has_acls = 'system.posix_acl_access' in os.listxattr(src) - except Exception as e: - # assume unwanted ACLs by default - src_has_acls = True - - module.atomic_move(b_mysrc, dest, unsafe_writes=module.params['unsafe_writes']) - - if PY3 and hasattr(os, 'listxattr') and platform.system() == 'Linux' and not remote_src: - # atomic_move used above to copy src into dest might, in some cases, - # use shutil.copy2 which in turn uses shutil.copystat. - # Since Python 3.3, shutil.copystat copies file extended attributes: - # https://docs.python.org/3/library/shutil.html#shutil.copystat - # os.listxattr (along with others) was added to handle the operation. - - # This means that on Python 3 we are copying the extended attributes which includes - # the ACLs on some systems - further limited to Linux as the documentation above claims - # that the extended attributes are copied only on Linux. Also, os.listxattr is only - # available on Linux. - - # If not remote_src, then the file was copied from the controller. In that - # case, any filesystem ACLs are artifacts of the copy rather than preservation - # of existing attributes. Get rid of them: - - if src_has_acls: - # FIXME If dest has any default ACLs, there are not applied to src now because - # they were overridden by copystat. Should/can we do anything about this? - # 'system.posix_acl_default' in os.listxattr(os.path.dirname(b_dest)) - - try: - clear_facls(dest) - except ValueError as e: - if 'setfacl' in to_native(e): - # No setfacl so we're okay. The controller couldn't have set a facl - # without the setfacl command - pass - else: - raise - except RuntimeError as e: - # setfacl failed. - if 'Operation not supported' in to_native(e): - # The file system does not support ACLs. - pass - else: - raise - - except (IOError, OSError): - module.fail_json(msg="failed to copy: %s to %s" % (src, dest), traceback=traceback.format_exc()) - changed = True - else: - changed = False - - if checksum_src is None and checksum_dest is None: - if remote_src and os.path.isdir(module.params['src']): - b_src = to_bytes(module.params['src'], errors='surrogate_or_strict') - b_dest = to_bytes(module.params['dest'], errors='surrogate_or_strict') - - if src.endswith(os.path.sep) and os.path.isdir(module.params['dest']): - diff_files_changed = copy_diff_files(b_src, b_dest, module) - left_only_changed = copy_left_only(b_src, b_dest, module) - common_dirs_changed = copy_common_dirs(b_src, b_dest, module) - owner_group_changed = chown_recursive(b_dest, module) - if diff_files_changed or left_only_changed or common_dirs_changed or owner_group_changed: - changed = True - - if src.endswith(os.path.sep) and not os.path.exists(module.params['dest']): - b_basename = to_bytes(os.path.basename(src), errors='surrogate_or_strict') - b_dest = to_bytes(os.path.join(b_dest, b_basename), errors='surrogate_or_strict') - b_src = to_bytes(os.path.join(module.params['src'], ""), errors='surrogate_or_strict') - if not module.check_mode: - shutil.copytree(b_src, b_dest, symlinks=not(local_follow)) - chown_recursive(dest, module) - changed = True - - if not src.endswith(os.path.sep) and os.path.isdir(module.params['dest']): - b_basename = to_bytes(os.path.basename(src), errors='surrogate_or_strict') - b_dest = to_bytes(os.path.join(b_dest, b_basename), errors='surrogate_or_strict') - b_src = to_bytes(os.path.join(module.params['src'], ""), errors='surrogate_or_strict') - if not module.check_mode and not os.path.exists(b_dest): - shutil.copytree(b_src, b_dest, symlinks=not(local_follow)) - changed = True - chown_recursive(dest, module) - if module.check_mode and not os.path.exists(b_dest): - changed = True - if os.path.exists(b_dest): - diff_files_changed = copy_diff_files(b_src, b_dest, module) - left_only_changed = copy_left_only(b_src, b_dest, module) - common_dirs_changed = copy_common_dirs(b_src, b_dest, module) - owner_group_changed = chown_recursive(b_dest, module) - if diff_files_changed or left_only_changed or common_dirs_changed or owner_group_changed: - changed = True - - if not src.endswith(os.path.sep) and not os.path.exists(module.params['dest']): - b_basename = to_bytes(os.path.basename(module.params['src']), errors='surrogate_or_strict') - b_dest = to_bytes(os.path.join(b_dest, b_basename), errors='surrogate_or_strict') - if not module.check_mode and not os.path.exists(b_dest): - os.makedirs(b_dest) - b_src = to_bytes(os.path.join(module.params['src'], ""), errors='surrogate_or_strict') - diff_files_changed = copy_diff_files(b_src, b_dest, module) - left_only_changed = copy_left_only(b_src, b_dest, module) - common_dirs_changed = copy_common_dirs(b_src, b_dest, module) - owner_group_changed = chown_recursive(b_dest, module) - if diff_files_changed or left_only_changed or common_dirs_changed or owner_group_changed: - changed = True - if module.check_mode and not os.path.exists(b_dest): - changed = True - - res_args = dict( - dest=dest, src=src, md5sum=md5sum_src, checksum=checksum_src, changed=changed - ) - if backup_file: - res_args['backup_file'] = backup_file - - module.params['dest'] = dest - if not module.check_mode: - file_args = module.load_file_common_arguments(module.params) - res_args['changed'] = module.set_fs_attributes_if_different(file_args, res_args['changed']) - - module.exit_json(**res_args) - - -if __name__ == '__main__': - main() diff --git a/roles/slurm/tasks/dockerimage.yml b/roles/slurm/tasks/dockerimage.yml index cf73759..77e4209 100644 --- a/roles/slurm/tasks/dockerimage.yml +++ b/roles/slurm/tasks/dockerimage.yml @@ -1,19 +1,19 @@ - file: - path: "/container/docker-images/{{item}}" + path: "/home/centos7/docker-images/{{item}}" state: directory owner: "{{unpriv_user}}" group: docker - copy: src: "{{item}}.Dockerfile" - dest: "/container/docker-images/{{item}}/Dockerfile" + dest: "/home/centos7/docker-images/{{item}}/Dockerfile" owner: "{{unpriv_user}}" group: docker register: slurm_cp_dockerfile - copy: src: "entrypoint.sh" - dest: "/container/docker-images/{{item}}/entrypoint.sh" + dest: "/home/centos7/docker-images/{{item}}/entrypoint.sh" owner: root group: root mode: u=rwx,g=rx,o=rx @@ -24,7 +24,7 @@ # pull: False build: pull: False - path: "/container/docker-images/{{item}}" + path: "/home/centos7/docker-images/{{item}}" # target: "{{item}}" # unsupported on old docker-py versions as in el7 source: build force_source: "{{slurm_cp_dockerfile.changed or slurm_cp_entrypt.changed}}" diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml index 8ee8ff0..7416cc3 100644 --- a/roles/slurm/tasks/main.yml +++ b/roles/slurm/tasks/main.yml @@ -21,7 +21,7 @@ mode: u=rw,g=,o= - file: - path: /container/volumes/munge + path: /home/centos7/volumes/munge state: directory owner: munge group: munge @@ -33,17 +33,17 @@ force: true mode: preserve src: /etc/munge/munge.key - dest: /container/volumes/munge/munge.key + dest: /home/centos7/volumes/munge/munge.key - file: - path: /container/volumes/slurm/ + path: /home/centos7/volumes/slurm/ state: directory - name: upload slurm config template: force: true src: "{{item}}.j2" - dest: "/container/volumes/slurm/{{item}}" + dest: "/home/centos7/volumes/slurm/{{item}}" loop: - slurm.conf - cgroup.conf @@ -90,8 +90,8 @@ networks_cli_compatible: True vars: default_mounts: - - /container/volumes/slurm/:/etc/slurm/:rw - - /container/volumes/munge/munge.key:/etc/munge/munge.key:rw + - /home/centos7/volumes/slurm/:/etc/slurm/:rw + - /home/centos7/volumes/munge/munge.key:/etc/munge/munge.key:rw - slurm-shared:/shared/:rw slurm_nodes_all: | # add execute nodes {% for i in range(1, 4) -%} diff --git a/roles/docker-htcondor/library/copy.py b/roles/slurm/testlib/copy.py similarity index 100% rename from roles/docker-htcondor/library/copy.py rename to roles/slurm/testlib/copy.py