forked from GLEAM-X/GLEAM-X-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGLEAM-X-pipeline-template.profile
189 lines (162 loc) · 12.7 KB
/
GLEAM-X-pipeline-template.profile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#! /bin/bash -l
echo "loading gleam-x profile"
# Any system module file should be loaded here. Aside from singularity and slurm there are
# no additional modules that are expected to be needed
module load singularity
# Before running obs_*.sh scripts ensure the completed configuration file has been sourced.
# As a convention when specifying paths below, please ensure that they do not end with a trailing '/', as
# this assists in readability when combining paths.
# Basic configuration
cluster="garrawarla" # System-wide name of cluster, e.g. "garrawarla". This should match the name of the cluster, which can be checked
# with 'scontrol show config' under ClusterName, or under the environment variable HOST_CLUSTER
# This is used when submitting jobs 'sbatch -M ${GXCLUSTER}'. See GXCLUSTER below.
export GXUSER=$(whoami) # User name of the operator running the pipeline. This is here to generate user-specific filenames and paths.
# It is recommend to use the login account name, although in principal this is not critical and other approaches
# could be adopted
export GXVERSION='3.1.0' # Version number of the pipeline. This should not be changed. Currently it is defined here but not used.
export GXACCOUNT= # The SLURM account jobs will be run under. e.g. 'pawsey0272'. Empty will not pass through a
# corresponding --account=${GXACCOUNT} to the slurm submission. Only relevant if SLURM is tracking time usage
export GXBASE="/not/actual/path" # Path to base of GLEAM-X Pipeline where the repository was 'git clone' into including the name of the repository foldername, e.g. "/astro/mwasci/tgalvin/GLEAM-X-pipeline"
export GXSCRATCH="/scratch" # Path to your scratch space used for processing on the HPC environment, e.g. /scratch
# Within pawsey this is /astro/mwas/${GXUSER}
export GXHOME="${GXSCRATCH}" # HOME space for some tasks. In some system configurations singularity can not mount $HOME, but applications (e.g. CASA, python) would like
# one to be available to cache folders. This does not have to be an actual $HOME directory, just a folder with read and write access.
# Suggestion is the same path as the scratch space, e.g. $GXSCRATCH. Although if the HPC is configured correctly it could be set to HOME.
# This variable is not used in any tasks -- but it used in the creation of the SINGULARITY_BINDPATH variable.
export GXCONTAINER="${GXSCRATCH}/gleamx.img" # Absolute path to the GLEAM-X singularity container, including the file name, e.g. "${GXSCRATCH}/gleamx.img"
# This container is still being evaluated and available when requested from Tim Galvin. In a future update
# the container will be automatically downloaded alongside other data dependencies.
# SLURM compute schedular information
export GXCOMPUTER=${cluster} # Maintained for compatability. Describes the name of the cluster, e.g. "magnus".
export GXCLUSTER=${cluster} # Describes the name of the cluster, e.g. "magnus". This is used when submitting tasks 'sbatch -M ${GXCLUSTER}', and in track_task.py as a
# component of the composite key. In the future the '-M' machine option will likely be removed from all but obs_manta.sh.
export GXSTANDARDQ="workq" # Slurm queue to submit tasks to, e.g. "workq". Available queues can be inspected using 'sinfo' on a system where the slurm schedular is available
# Compute node memory specification
export GXABSMEMORY=60 # Absolute memory a machine should be considered to have in GB, e.g. 60. This value is submitted to slurm via "--mem=${GXABSMEMORY}"
# For tasks that require only a small memory allocation, this option is ignored and a hard-coded value of "24G" is used. This is done
# to assist in quicker resource allocation
export GXMEMORY=50 # Typical memory a program should use in GB, e.g. 50. This is used for tasks like 'calibrate' and 'wsclean' to attempt to limit
# its usage to the fit within the memory allocation alongside other overheads. It is recommended that this be ~10G smaller than
# GXABSMEMORY, although there is no technical reason it could be set otherwise.
# Compute node CPU specification
export GXNCPUS=48 # Number of CPUs of each machine, e.g. 48. For tasks that have a 'core' like option this value is passed.
export GXNLCPUS=48 # Number of logical CPUs on each machine, e.g. 48. This option may be phased out.
export GXNPCPUS=24 # Number of physical CPUs on each machine, e.g. 24. This is meant to count only the physical cores available.
export GXNCPULINE="--ntasks-per-node=${GXNPCPUS}" # Informs the SLURM request how many CPUs should be allocated, e.g. "--ntasks-per-node=${GXNPCPUS}"
# If unset the SLURM default will be used. For tasks that are not parallelisable (apply_cal, uvflag),
# this option will be overwritten (if it is set) to ensure a single core is used.
# There may be some interaction between this line and $GXNCPUS when deployed. For instance, on magnus only a max of 24 cores
# can be requested, but there are 48 available in the job (logical hyper-threaded cores ignored in resource request).
# For SLURM environments that do not share a node among users (entire node is booked for a request), it is suggested that
# this option is left empty.
# SLURM job submission details
export GXTASKLINE= # Reserved space for additional slurm sbatch options, if needed. This is passed to all SLURM sbatch calls.
export GXLOG="${GXBASE}/log_${GXCLUSTER}" # Path to output task logs, e.g. ${GXBASE}/queue/log_${GXCLUSTER}. It is recommended that this is cluster specific.
export GXSCRIPT="${GXBASE}/script_${GXCLUSTER}" # Path to place generated template scripts. e.g. "${GXBASE}/script_${GXCLUSTER}". It is recommended that this is cluster specific.
export GXTRACK='no-track' # Directive to inform task tracking for meta-database. 'track' will track task progression. Anything else will disable tracking.
export GXSSH="${GXBASE}/ssh_keys/gx_${GXUSER}" # Path to SSH private key to be used for archiving. If you direct it to a new generated key-pair
if [ ! -z "${GXSSH}" ] && [ ! -f "${GXSSH}" ] # ensure restricted folder/file permissions, e.g. cmod -R 700 "${GXBASE}/ssh_keys"
then # Keys can be generated with: ssh-keygen -t rsa -f "${GXBASE}/ssh_keys/gx_${GXUSER}"
echo "GXSSH set to ${GXSSH}, but not found. Setting to empty." # This is used only in the archiving script, as on Magnus it appears singularity can not bind to $HOME correctly.
export GXSSH="" # The normal ssh key in a users home directory can be used as well, e.g. ${HOME}/.ssh/id_rsa
fi
# Data dependencies
# Data dependencies are downloaded into the directories below if the directories do not exist.
export GXMWAPB="${GXBASE}/data/mwa_pb" # The calibrate program requires the FEE model of the MWA primary beam.
# This describes the path that containers the file mwa_full_embedded_element_pattern.h5
# and can be downloaded from http://cerberus.mwa128t.org/mwa_full_embedded_element_pattern.h5
# If this folder does not exist, it is created.
export GXMWALOOKUP="${GXBASE}/data/pb" # The path to the folder containing the MWA PB lookup HDF5's used by lookup_beam.py and lookup_jones.py.
# If this folder does not exist, it is created.
# Details for obs_manta
export GXCOPYA= # Account to submit obs_manta.sh job under, if time accounting is being performed by SLURM.
# Leave this empty if the job is to be submitted as the user and there is no time accounting.
export GXCOPYQ='copyq' # A required parameter directing the job to a particular queue on $GXCOPYM. Set as just the queue name, e.g. 'copyq'
export GXCOPYM='zeus' # A required parameter directing the job to be submitted to a particular machine. Set as just the machine name, e.g. 'zeus'
# Staging area
export GXSTAGE= # To support the polarisation effort led by Xiang Zhang and George Heald at CSIRO, calibrated measurement sets
# will be placed into a staging area so that they may be remotely copied. Due to the CSIRO network security
# set up, connections have to be initiated from their internal network, meaning the GLEAM-X pipeline will not
# be able to initiate the transfer. If you are *NOT* involved with the processing effort for all of the G0008
# data then you may ignore this. If you *ARE* involved, please reach out to a GLEAM-X member to ensure this
# is correctly configured and known on the CSIRO side.
# Singularity bind paths
# This describes a set of paths that need to be available within the container for all processing tasks. Depending on the system
# and pipeline configuration it is best to have these explicitly set across all tasks. For each 'singularity run' command this
# SINGULARITY_BINDPATHS will be used to mount against. These GX variables should be all that is needed on a typical deployed
# pipeline, but can be used to further expose/enhance functionality if desired.
export SINGULARITY_BINDPATH="${GXHOME}:${HOME},${GXSCRIPT},${GXBASE},${GXSCRATCH},${GXSSH},${GXMWALOOKUP}:/pb_lookup,${GXMWAPB},${GXSTAGE}"
export PATH="${PATH}:${GXBASE}/bin" # Adds the obs_* script to the searchable path.
export HOST_CLUSTER=${GXCLUSTER} # Maintained for compatability. Will be removed soon.
# Force matplotlib to write configuration to a location with write access. Attempting to fix issues on pawsey
export MPLCONFIGDIR="${GXSCRATCH}"
#Redirect astropy cache and home location. Attempting to fix issue on pawsey.
export XDG_CONFIG_HOME="${GXSCRATCH}"
export XDG_CACHE_HOME="${GXSCRATCH}"
# Loads a file that contains secrets used throughout the pipeline. These include
# - MWA_ASVO_API_KEY
# - GXDBHOST
# - GXDBPORT
# - GXDBUSER
# - GXDBPASS
# This GXSECRETS file is expected to export each of the variables above.
# This file SHOULD NOT be git tracked!
GXSECRETS=
if [[ -f ${GXSECRETS} ]]
then
source "${GXSECRETS}"
fi
# Check that required variables have a value. This perfoms a simple 'is empty' check
for var in GXCOMPUTER GXCLUSTER GXSTANDARDQ GXABSMEMORY GXMEMORY GXNCPUS GXNLCPUS GXNPCPUS GXUSER GXCOPYQ GXCOPYM GXLOG GXSCRIPT
do
if [[ -z ${!var} ]]
then
echo "${var} is currently not configured, please ensure it was a valid value"
return 1
fi
done
# Check that the following values that point to a path actually exist. These are ones that (reasonably) should not
# automatically be created
for var in GXBASE GXSCRATCH GXHOME
do
if [[ ! -d ${!var} ]]
then
echo "The ${var} configurable has the path ${!var}, which appears to not exist. Please ensure it is a valid path."
return 1
fi
done
# Creates directories as needed below for the mandatory paths if they do not exist
if [[ ! -d "${GXLOG}" ]]
then
mkdir -p "${GXLOG}"
fi
if [[ ! -d "${GXSCRIPT}" ]]
then
mkdir -p "${GXSCRIPT}"
fi
# Create the staging area if it has been configured, otherwise skip
if [[ ! -z "${GXSTAGE}" ]]
then
if [[ ! -d "${GXSTAGE}" ]]
then
mkdir -p "${GXSTAGE}"
fi
fi
# Go fourth and download the data dependencies
if [[ -d ${GXBASE} ]]
then
if [[ ! -d ${GXMWAPB} ]]
then
echo "Creating ${GXMWAPB} and caching FEE hdf5 file"
mkdir -p ${GXMWAPB} \
&& wget -P ${GXMWAPB} http://cerberus.mwa128t.org/mwa_full_embedded_element_pattern.h5
fi
if [[ ! -d ${GXMWALOOKUP} ]]
then
echo "Creating ${GXMWALOOKUP} and caching hdf5 lookup files"
mkdir -p ${GXMWALOOKUP} \
&& wget -O pb_lookup.tar.gz -P ${GXMWALOOKUP} https://cloudstor.aarnet.edu.au/plus/s/77FRhCpXFqiTq1H/download \
&& tar -xzvf pb_lookup.tar.gz -C ${GXMWALOOKUP} \
&& rm pb_lookup.tar.gz
fi
fi