From 7399adb5d2a6c6c246c54fca8c87885bd03c9654 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Fri, 13 Sep 2024 19:13:07 +0300 Subject: [PATCH] e2e: add fuzz test sources for hbm/dram/pmem pods Adds fuzz test generator script, model and runner for topology-aware policy reliability tests on HBM+DRAM+PMEM platform. Signed-off-by: Antti Kervinen --- .../test02-fuzz-memallocs/code.var.sh | 53 ++++++ .../test02-fuzz-memallocs/codelib.sh | 12 ++ .../n6-hbm-cxl/test02-fuzz-memallocs/fuzz.aal | 153 ++++++++++++++++++ .../test02-fuzz-memallocs/fuzz.fmbt.conf | 6 + .../test02-fuzz-memallocs/generate.sh | 63 ++++++++ 5 files changed, 287 insertions(+) create mode 100644 test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/code.var.sh create mode 100644 test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/codelib.sh create mode 100644 test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.aal create mode 100644 test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.fmbt.conf create mode 100755 test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/generate.sh diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/code.var.sh b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/code.var.sh new file mode 100644 index 000000000..5e0316766 --- /dev/null +++ b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/code.var.sh @@ -0,0 +1,53 @@ +helm-terminate +helm_config=$(instantiate helm-config.yaml) helm-launch topology-aware + +source $TEST_DIR/codelib.sh || { + echo "error importing codelib.sh" + exit 1 +} + +# Clean test pods from the kube-system namespace +cleanup-test-pods() { + ( vm-command "kubectl delete pods -n kube-system \$(kubectl get pods -n kube-system | awk '/t[0-9]r[gb][ue]/{print \$1}')" ) || true + ( vm-command "kubectl delete pods -n default \$(kubectl get pods -n default | awk '/t[0-9][rgb][ue][0-9]/{print \$1}')" ) || true +} +cleanup-test-pods + +# Run generated*.sh test scripts in this directory. +genscriptcount=0 +for genscript in "$TEST_DIR"/generated*.sh; do + if [ ! -f "$genscript" ]; then + continue + fi + ( + paralleloutdir="$outdir/parallel$genscriptcount" + [ -d "$paralleloutdir" ] && rm -rf "$paralleloutdir" + mkdir "$paralleloutdir" + OUTPUT_DIR="$paralleloutdir" + COMMAND_OUTPUT_DIR="$paralleloutdir/commands" + mkdir "$COMMAND_OUTPUT_DIR" + source "$genscript" 2>&1 | sed -u -e "s/^/$(basename "$genscript"): /g" + ) & + genscriptcount=$(( genscriptcount + 1)) +done + +if [[ "$genscriptcount" == "0" ]]; then + echo "WARNING:" + echo "WARNING: Skipping fuzz tests:" + echo "WARNING: - Generated tests not found." + echo "WARNING: - Generate a test by running:" + echo "WARNING: $TEST_DIR/generate.sh" + echo "WARNING: - See test generation options:" + echo "WARNING: $TEST_DIR/generate.sh --help" + echo "WARNING:" + sleep 5 + exit 0 +fi + +echo "waiting for $genscriptcount generated tests to finish..." +wait + +cleanup-test-pods + +# Restore default test configuration, restart nri-resource-policy. +helm-terminate diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/codelib.sh b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/codelib.sh new file mode 100644 index 000000000..f897c4b6a --- /dev/null +++ b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/codelib.sh @@ -0,0 +1,12 @@ +container-exit0() { + # Terminate a container by killing the "sleep inf" child process in + # echo CONTNAME $(sleep inf) + local contname="$1" + vm-command "contpid=\$(ps axf | grep -A1 'echo $contname' | grep -v grep | tail -n 1 | awk '{print \$1}'); ( set -x; kill -KILL \$contpid; )" +} + +container-signal() { + local contname="$1" + local signal="$2" + vm-command "pkill -$signal -f 'echo $contname'" +} diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.aal b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.aal new file mode 100644 index 000000000..b3bbb6eeb --- /dev/null +++ b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.aal @@ -0,0 +1,153 @@ +language python { + max_mem=13500 # maximum memory on VM in MB ## NOTE: generate.sh will overwrite this + max_cpu=7000 # maximum CPUs on node in mCPU ## NOTE: generate.sh will overwrite this + max_reserved_cpu=1000 # maximum reserved CPUs on node in mCPU ## NOTE: generate.sh will overwrite this + class Vars: + # namespace for variables in input names + def __repr__(self): + return "{" + ",".join("%s:%s" % (a, getattr(self, a)) for a in sorted(self.__dict__.keys()) if not a.startswith("_")) + "}\n" + def inputvars(input_name): + # parse VAR=VALUE's from input_name + v = Vars() + for word in input_name.split(): + keyvalue = word.split("=") + if len(keyvalue) == 2: + if (keyvalue[1].endswith("m") or keyvalue[1].endswith("M")) and len(keyvalue[1]) > 1 and keyvalue[1][-2] in '0123456789': + keyvalue[1] = keyvalue[1][:-1] + try: + setattr(v, keyvalue[0], int(keyvalue[1])) + except: + setattr(v, keyvalue[0], keyvalue[1]) + return v +} + +variables { + mem, cpu, reserved_cpu, pods +} + +initial_state { + mem=0 + cpu=0 + reserved_cpu=0 + pods={} +} + +# Create non-reserved CPU pods +input + "NAME=gu0 CONTCOUNT=1 CPU=200m MEM=1500M create guaranteed", + "NAME=gu1 CONTCOUNT=2 CPU=1000m MEM=500M create guaranteed", + "NAME=gu1hbm CONTCOUNT=2 CPU=1000m MEM=500M MEMTYPE=hbm create guaranteed", + "NAME=gu2 CONTCOUNT=2 CPU=1200m MEM=4500M create guaranteed", + "NAME=gu2pmem CONTCOUNT=2 CPU=1200m MEM=4500M MEMTYPE=pmem create guaranteed", + "NAME=gu3 CONTCOUNT=3 CPU=2000m MEM=500M create guaranteed", + "NAME=gu3dram CONTCOUNT=3 CPU=2000m MEM=500M MEMTYPE=dram create guaranteed", + "NAME=gu4 CONTCOUNT=1 CPU=4200m MEM=100M create guaranteed", + "NAME=bu0 CONTCOUNT=1 CPU=1200m MEM=50M CPUREQ=900m MEMREQ=49M CPULIM=1200m MEMLIM=50M create burstable", + "NAME=bu0hbmpmem CONTCOUNT=1 CPU=1200m MEM=50M CPUREQ=900m MEMREQ=49M CPULIM=1200m MEMLIM=50M MEMTYPE=hbm,pmem create burstable", + "NAME=bu1 CONTCOUNT=2 CPU=1900m MEM=300M CPUREQ=1800m MEMREQ=299M CPULIM=1900m MEMLIM=300M create burstable", + "NAME=bu1hbmdram CONTCOUNT=2 CPU=1900m MEM=300M CPUREQ=1800m MEMREQ=299M CPULIM=1900m MEMLIM=300M MEMTYPE=hbm,dram create burstable", + "NAME=be0 CONTCOUNT=1 CPU=0 MEM=0 create besteffort", + "NAME=be1 CONTCOUNT=3 CPU=0 MEM=0 create besteffort" +{ + guard { + v = inputvars(input_name) + return (v.NAME not in pods + and (mem + v.MEM * v.CONTCOUNT < max_mem) + and (cpu + v.CPU * v.CONTCOUNT < max_cpu)) + } + body { + v = inputvars(input_name) + v.namespace = getattr(v, "namespace", "default") + mem += v.MEM * v.CONTCOUNT + cpu += v.CPU * v.CONTCOUNT + pods[v.NAME] = v + } +} + +# Create pods to the kube-system namespace +input + "NAME=rgu0 CONTCOUNT=2 CPU=100m MEM=1000M namespace=kube-system create guaranteed", + "NAME=rgu0pmem CONTCOUNT=2 CPU=100m MEM=1000M namespace=kube-system MEMTYPE=pmem create guaranteed", + "NAME=rbu0 CONTCOUNT=1 CPU=100m MEM=100M CPUREQ=99m MEMREQ=99M CPULIM=100m MEMLIM=100M namespace=kube-system create burstable", + "NAME=rbe0 CONTCOUNT=2 CPU=0 MEM=0 namespace=kube-system create besteffort" +{ + guard { + v = inputvars(input_name) + return (v.NAME not in pods + and (mem + v.MEM * v.CONTCOUNT < max_mem) + and (reserved_cpu + v.CPU * v.CONTCOUNT < max_reserved_cpu)) + + } + body { + v = inputvars(input_name) + mem += v.MEM * v.CONTCOUNT + reserved_cpu += v.CPU * v.CONTCOUNT + pods[v.NAME] = v + } +} + +# Kill a process in a container +# - "echo gu0c1" matches and kills process only in container gu0c1 in pod gu0 +# - "echo gu0" matches and kills processes in all containers of pod gu0 +input + "NAME=gu0 container-exit0 gu0c0", + "NAME=gu1 container-exit0 gu1c0", + "NAME=gu1hbm container-exit0 gu1hbmc0", + "NAME=gu2 container-exit0 gu2c0", + "NAME=gu2pmem container-exit0 gu2pmemc0", + "NAME=gu3 container-exit0 gu3", + "NAME=gu3dram container-exit0 gu3dramc0", + "NAME=gu4 container-exit0 gu4c", + "NAME=bu0 container-exit0 bu0c0", + "NAME=bu0hbmpmem container-exit0 bu0hbmpmemc0", + "NAME=bu1 container-exit0 bu1c0", + "NAME=bu1hbmdram container-exit0 bu1hbmdramc0", + "NAME=be0 container-exit0 be0c0", + "NAME=be1 container-exit0 be0c0", + "NAME=rgu0 container-exit0 rgu0c0", + "NAME=rgu0pmem container-exit0 rgu0pmemc0", + "NAME=rbu0 container-exit0 rbu0c0", + "NAME=rbe0 container-exit0 rbe0c0" +{ + guard { + v = inputvars(input_name) + return v.NAME in pods + } +} + +# Delete single pod +input + "NAME=gu0 vm-command 'kubectl delete pod gu0 --now'", + "NAME=gu1 vm-command 'kubectl delete pod gu1 --now'", + "NAME=gu1hbm vm-command 'kubectl delete pod gu1hbm --now'", + "NAME=gu2 vm-command 'kubectl delete pod gu2 --now'", + "NAME=gu2pmem vm-command 'kubectl delete pod gu2pmem --now'", + "NAME=gu3 vm-command 'kubectl delete pod gu3 --now'", + "NAME=gu3dram vm-command 'kubectl delete pod gu3dram --now'", + "NAME=gu4 vm-command 'kubectl delete pod gu4 --now'", + "NAME=bu0 vm-command 'kubectl delete pod bu0 --now'", + "NAME=bu0hbmpmem vm-command 'kubectl delete pod bu0hbmpmem --now'", + "NAME=bu1 vm-command 'kubectl delete pod bu1 --now'", + "NAME=bu1hbmdram vm-command 'kubectl delete pod bu1hbmdram --now'", + "NAME=be0 vm-command 'kubectl delete pod be0 --now'", + "NAME=be1 vm-command 'kubectl delete pod be1 --now'", + "NAME=rgu0 vm-command 'kubectl delete pod rgu0 -n kube-system --now'", + "NAME=rgu0pmem vm-command 'kubectl delete pod rgu0pmem -n kube-system --now'", + "NAME=rbu0 vm-command 'kubectl delete pod rbu0 -n kube-system --now'", + "NAME=rbe0 vm-command 'kubectl delete pod rbe0 -n kube-system --now'" +{ + guard { + v = inputvars(input_name) + return v.NAME in pods + } + body { + v = inputvars(input_name) + p = pods[v.NAME] + mem -= p.MEM * p.CONTCOUNT + if getattr(p, "namespace", "") == "kube-system": + reserved_cpu -= p.CPU * p.CONTCOUNT + else: + cpu -= p.CPU * p.CONTCOUNT + del pods[v.NAME] + } +} diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.fmbt.conf b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.fmbt.conf new file mode 100644 index 000000000..f1c6d6900 --- /dev/null +++ b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.fmbt.conf @@ -0,0 +1,6 @@ +model = aal_remote(remote_pyaal --verbose-fmbt-log fuzz.aal) +heuristic = mrandom(80,lookahead(1:2),20,random) +coverage = perm(2) + +pass = coverage(10) +pass = steps(100) diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/generate.sh b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/generate.sh new file mode 100755 index 000000000..bcbae8270 --- /dev/null +++ b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/generate.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +usage() { + cat < Number of generated test scripts than run in parallel. + MEM= Memory [MB] available for test pods in the system. + CPU= Non-reserved CPU [mCPU] available for test pods in the system. + RESERVED_CPU= Reserved CPU [mCPU] available for test pods in the system. + STEPS= Total number of test steps in all parallel tests. + + FMBT_IMAGE= Generate the test using fmbt from docker image IMG:TAG. + The default is fmbt-cli:latest. +EOF + exit 0 +} + +if [ -n "$1" ]; then + usage +fi + +TESTCOUNT=${TESTCOUNT:-1} +MEM=${MEM:-11000} +# 950 mCPU taken by the control plane, split the remaining 15050 mCPU +# available for test pods to CPU and RESERVED_CPU pods. +CPU=${CPU:-7000} +RESERVED_CPU=${RESERVED_CPU:-1000} +STEPS=${STEPS:-100} +FMBT_IMAGE=${FMBT_IMAGE:-"fmbt-cli:latest"} + +mem_per_test=$(( MEM / TESTCOUNT )) +cpu_per_test=$(( CPU / TESTCOUNT )) +reserved_cpu_per_test=$(( RESERVED_CPU / TESTCOUNT )) +steps_per_test=$(( STEPS / TESTCOUNT )) + +# Check fmbt Docker image +docker run "$FMBT_IMAGE" fmbt --version 2>&1 | grep ^Version: || { + echo "error: cannot run fmbt from Docker image '$FMBT_IMAGE'" + echo "You can build the image locally by running:" + echo "( cd /tmp && git clone --branch devel https://github.com/intel/fmbt && cd fmbt && docker build . -t $FMBT_IMAGE -f Dockerfile.fmbt-cli )" + exit 1 +} + +cd "$(dirname "$0")" || { + echo "cannot cd to the directory of $0" + exit 1 +} + +for testnum in $(seq 1 "$TESTCOUNT"); do + testid=$(( testnum - 1)) + sed -e "s/max_mem=.*/max_mem=${mem_per_test}/" \ + -e "s/max_cpu=.*/max_cpu=${cpu_per_test}/" \ + -e "s/max_reserved_cpu=.*/max_reserved_cpu=${reserved_cpu_per_test}/" \ + < fuzz.aal > tmp.fuzz.aal + sed -e "s/fuzz\.aal/tmp.fuzz.aal/" \ + -e "s/pass = steps(.*/pass = steps(${steps_per_test})/" \ + < fuzz.fmbt.conf > tmp.fuzz.fmbt.conf + OUTFILE=generated${testid}.sh + echo "generating $OUTFILE..." + docker run -v "$(pwd):/mnt/models" "$FMBT_IMAGE" sh -c 'cd /mnt/models; fmbt tmp.fuzz.fmbt.conf 2>/dev/null | fmbt-log -f STEP\$sn\$as\$al' | grep -v AAL | sed -e 's/^, / /g' -e '/^STEP/! s/\(^.*\)/echo "TESTGEN: \1"/g' -e 's/^STEP\([0-9]*\)i:\(.*\)/echo "TESTGEN: STEP \1"; vm-command "date +%T.%N"; \2; vm-command "date +%T.%N; kubectl get pods -A"/g' | sed "s/\([^a-z0-9]\)\(r\?\)\(gu\|bu\|be\)\([0-9]\)/\1t${testid}\2\3\4/g" > "$OUTFILE" +done