Skip to content

Commit

Permalink
feat(validator): add support to validate kepler metrics
Browse files Browse the repository at this point in the history
This commit adds support to validate essential metrics produced by
Kepler

Signed-off-by: vprashar2929 <[email protected]>
  • Loading branch information
vprashar2929 committed Nov 8, 2024
1 parent 82dc44a commit 33d2963
Show file tree
Hide file tree
Showing 8 changed files with 495 additions and 4 deletions.
354 changes: 354 additions & 0 deletions e2e/tools/validator/metric_validations.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,354 @@
config:
mapping:
actual: latest
predicted: dev

validations:
# node rapl comparison
- name: node-rapl - kepler-package
units: Watts
mapping:
actual: node-rapl
predicted: kepler-package

node-rapl: |
sum(
rate(
node_rapl_package_joules_total[{rate_interval}]
)
)
kepler-package: |
sum(
rate(
kepler_node_package_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 1.00

- name: node-rapl - kepler-core
units: Watts
mapping:
actual: node-rapl
predicted: kepler-core

node-rapl: |
sum(
rate(
node_rapl_core_joules_total[{rate_interval}]
)
)
kepler-core: |
sum(
rate(
kepler_node_core_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 1.00

- name: node-rapl - kepler-dram
units: Watts
mapping:
actual: node-rapl
predicted: kepler-dram

node-rapl: |
sum(
rate(
node_rapl_dram_joules_total[{rate_interval}]
)
)
kepler-dram: |
sum(
rate(
kepler_node_dram_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 1.00

# absolute power comparison
- name: Total - absolute
latest: |
sum(
rate(
kepler_process_joules_total{{
job="latest",
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_joules_total{{
job="dev",
}}[{rate_interval}]
)
)
max_mae: 0.59

# CPU time comparison
- name: cpu-time
units: Milliseconds
latest: |
sum(
rate(
kepler_process_bpf_cpu_time_ms_total{{
job="latest"
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_bpf_cpu_time_ms_total{{
job="dev",
}}[{rate_interval}]
)
)
# max_mae: 20.0

# process comparison
- name: platform - dynamic
latest: |
sum(
rate(
kepler_process_platform_joules_total{{
job="latest", mode="dynamic",
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_platform_joules_total{{
job="dev", mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 0.59

- name: package - dynamic
units: Watts
latest: |
sum(
rate(
kepler_process_package_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_package_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 0.59

- name: core - dynamic
units: Watts
latest: |
sum(
rate(
kepler_process_core_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_core_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 0.59

- name: dram - dynamic
units: Watts
latest: |
sum(
rate(
kepler_process_dram_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_dram_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 0.59

- name: other - dynamic
units: Watts
latest: |
sum(
rate(
kepler_process_other_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_other_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 0.59

- name: uncore - dynamic
units: Watts
latest: |
sum(
rate(
kepler_process_uncore_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
)
dev: |
sum(
rate(
kepler_process_uncore_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
)
max_mae: 0.59

# node comparison
- name: node platform - dynamic
units: Watts
latest: |
rate(kepler_node_platform_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
dev: |
rate(kepler_node_platform_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
max_mae: 0.59

- name: node package - dynamic
units: Watts
latest: |
rate(kepler_node_package_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
dev: |
rate(kepler_node_package_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
max_mae: 0.59

- name: node core - dynamic
units: Watts
latest: |
rate(kepler_node_core_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
dev: |
rate(kepler_node_core_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
max_mae: 0.59

- name: node dram - dynamic
units: Watts
latest: |
rate(kepler_node_dram_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
dev: |
rate(kepler_node_dram_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
max_mae: 0.59

- name: node other - dynamic
units: Watts
latest: |
rate(kepler_node_other_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
dev: |
rate(kepler_node_other_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
max_mae: 0.59

- name: node uncore - dynamic
units: Watts
latest: |
rate(kepler_node_uncore_joules_total{{
job="latest",
mode="dynamic",
}}[{rate_interval}]
)
dev: |
rate(kepler_node_uncore_joules_total{{
job="dev",
mode="dynamic",
}}[{rate_interval}]
)
max_mae: 0.59
44 changes: 44 additions & 0 deletions e2e/tools/validator/scripts/regression-stressor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

set -eu -o pipefail

trap exit_all INT
exit_all() {
pkill -P $$
}

run() {
echo "$*"
"$@"
echo " ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾"
}

main() {

local cpus
cpus=$(nproc)

# load and time
local -a load_curve=(
0:5
10:20
25:20
50:20
75:20
50:20
25:20
10:20
0:5
)
# sleep 5 so that first run and the second run look the same
echo "Warmup .."
run stress-ng --cpu "$cpus" --cpu-method ackermann --cpu-load 0 --timeout 5

for x in "${load_curve[@]}"; do
local load="${x%%:*}"
local time="${x##*:}s"
run stress-ng --cpu "$cpus" --cpu-method ackermann --cpu-load "$load" --timeout "$time"
done
}

main "$@"
Loading

0 comments on commit 33d2963

Please sign in to comment.