-
Notifications
You must be signed in to change notification settings - Fork 1
/
schednodes
executable file
·144 lines (128 loc) · 5.19 KB
/
schednodes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
set -o pipefail \
-o errexit \
-o nounset
schedule_dir="$SGE_ROOT/default/common"
# 5038786:120:STARTING:1523502129:44100:H:node-u05a-014:slots:4.000000
# job id:task_id:reason:start time:end time:level:object:resource:utilization
function usage() {
echo "Usage: ${0##./} [--all|--schedule-file=FILE] [job id [task id]]
Parses schedule files and prints the nodes jobs were scheduled to start on.
By default, parses only the current schedule file: if this is rotated,
it will only show a limited selection of entries.
Note that the file arguments will automatically use the appropriate gzip
tool to decompress files ending with .gz.
Options:
-h,--help Print this message.
--schedule-file=FILE Parse FILE instead of the current schedule file.
--all Parse current schedule file and all files named
\$SGE_ROOT/default/common/schedule-????????.gz
--daysback=NUM Parse schedule files named as being from NUM+1 days
or more recent.
"
}
# Leaks in a schedule log file name (ew, FIXME), decompresses if necessary using appropriate tools
# and then, if args are provided, filters for a job ID and task ID
function get_sched_lines () {
# Select tools to transparently handle compressed files
if [[ "${schedule_file##*\.}" == "gz" ]] && [[ ! "$schedule_file" == "gz" ]]; then
CAT="zcat"
GREP="zgrep"
elif [[ "${schedule_file##*\.}" == "xz" ]]; then
CAT="xzcat"
GREP="xzgrep"
else
CAT="cat"
GREP="grep"
fi
if [[ -z "${1:-}" ]]; then
# If we don't have any args, just decompress
"$CAT" "$schedule_file"
elif [[ -n "${1:-}" ]] && [[ -z "${2:-}" ]]; then
# If we only have a job ID
if [[ ! "$1" =~ ^[0-9]*$ ]]; then
echo "Error: invalid job id: must be numeric only" >&2
exit 2
fi
"$GREP" -e "^$1:" -e :::::::: "$schedule_file"
else
# If we have a job ID and a task ID
if [[ ! "$1" =~ ^[0-9]*$ ]]; then
echo "Error: invalid job id: must be numeric only" >&2
exit 2
elif [[ ! "$2" =~ ^[0-9]*$ ]]; then
echo "Error: invalid task id: must be numeric only" >&2
exit 3
fi
"$GREP" -e "^$1:$2:" -e :::::::: "$schedule_file"
fi
}
# Takes a section of schedule log as stdin and outputs e.g.
# [Tue Sep 24 22:37:45 BST 2024] 1730421.1: node-j00a-002.myriad.ucl.ac.uk node-j00a-003.myriad.ucl.ac.uk
# May contain multiple copies of the same hostname for some types of job
function concat_nodes() {
awk -F: '
(($3 == "STARTING") && ($6 == "H") && ($8 == "slots")) {
# We make a hash key of the time, the job ID, and the task ID
# and use it to accumulate hostnames as a single string
# when we find schedule log entries for jobs starting
# See: `man sge_schedule` for more info on log format
jobs[$4 " " $1 "." $2]=jobs[$4 " " $1 "." $2] " " $7
}
($0 == "::::::::") {
# Indicates the beginning of a new scheduling cycle
# so dump info from last cycle
for (job in jobs) {
split(job, job_label_parts, " ")
time_stamp = job_label_parts[1]
job_id_and_task_id = job_label_parts[2]
print "[" strftime("%a %b %e %H:%M:%S %Z %Y", time_stamp) "] " job_id_and_task_id ":" jobs[job]
delete jobs[job]
}
delete jobs
}
END {
# Final dump
for (job in jobs) {
split(job, job_label_parts, " ")
time_stamp = job_label_parts[1]
job_id_and_task_id = job_label_parts[2]
print "[" strftime("%a %b %e %H:%M:%S %Z %Y", time_stamp) "] " job_id_and_task_id ":" jobs[job]
delete jobs[job]
}
}
'
}
function main() {
first_arg="${1:-}"
if [[ "${first_arg:0:16}" == "--schedule-file=" ]]; then
schedule_files=("${first_arg#--schedule-file=}")
shift
elif [[ "${first_arg}" == "--all" ]]; then
schedule_files=($(/bin/ls -f "$schedule_dir"/schedule "$schedule_dir"/schedule-????????.gz) $(/bin/ls -f "$schedule_dir"/schedule-????????.*.xz))
shift
elif [[ "${first_arg:0:11}" == "--daysback=" ]]; then
num_days="${first_arg:11}"
declare -a schedule_files
days=0
while [[ $days -le $num_days ]]; do
dated_file_stem="$schedule_dir/schedule-$(date --date="$days days ago" +%Y%m%d)"
schedule_files+=($(/bin/ls -f "$dated_file_stem"* 2>/dev/null))
days=$(( days + 1 ))
done
shift
elif [[ "${first_arg}" == "-h" ]] || [[ "${first_arg}" == "--help" ]]; then
usage
exit
else
schedule_files=("$schedule_dir/schedule")
fi
for schedule_file in "${schedule_files[@]}"; do
if [[ ! -r "$schedule_file" ]]; then
echo "Error: could not read schedule file: $schedule_file" >&2
exit 4
fi
get_sched_lines "${1:-}" "${2:-}" | concat_nodes
done
}
main "$@"