Skip to content

Commit

Permalink
WIP: expose balloons and container allocations in nrt
Browse files Browse the repository at this point in the history
Signed-off-by: Antti Kervinen <[email protected]>
  • Loading branch information
askervin committed Jan 22, 2025
1 parent f09cb0e commit 69c0030
Show file tree
Hide file tree
Showing 4 changed files with 304 additions and 2 deletions.
186 changes: 184 additions & 2 deletions cmd/plugins/balloons/policy/balloons-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ import (
"github.com/containers/nri-plugins/pkg/resmgr/events"
libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
policy "github.com/containers/nri-plugins/pkg/resmgr/policy"
policyapi "github.com/containers/nri-plugins/pkg/resmgr/policy"
"github.com/containers/nri-plugins/pkg/utils"
"github.com/containers/nri-plugins/pkg/utils/cpuset"
idset "github.com/intel/goresctrl/pkg/utils"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -73,6 +75,7 @@ type balloons struct {
allowed cpuset.CPUSet // bounding set of CPUs we're allowed to use
reserved cpuset.CPUSet // system-/kube-reserved CPUs
freeCpus cpuset.CPUSet // CPUs to be included in growing or new ballons
ifreeCpus cpuset.CPUSet // initially free CPUs before assigning any containers
cpuTree *cpuTreeNode // system CPU topology

reservedBalloonDef *BalloonDef // reserved balloon definition, pointer to bpoptions.BalloonDefs[x]
Expand Down Expand Up @@ -326,8 +329,186 @@ func (p *balloons) ExportResourceData(c cache.Container) map[string]string {
}

// GetTopologyZones returns the policy/pool data for 'topology zone' CRDs.
func (b *balloons) GetTopologyZones() []*policy.TopologyZone {
return nil
func (p *balloons) GetTopologyZones() []*policy.TopologyZone {
exposeAllocations := true // make this a balloons policy configuration/debug option
zones := []*policyapi.TopologyZone{}
sysmCpu := 1000 * p.cpuTree.cpus.Size()
for _, bln := range p.balloons {
// Expose every balloon as a separate zone.
zone := &policyapi.TopologyZone{
Name: bln.PrettyName(),
Type: "balloon",
}

cpu := &policyapi.ZoneResource{
Name: policyapi.CPUResource,
}

blnReqmCpu := p.requestedMilliCpus(bln)

// "Available" is the largest CPU request of a
// container that currently fits into the
// balloon. This takes into account containers already
// in the balloon, balloon's CPU limit (maxCPUs),
// policy's allowed CPUs and already allocated CPUs to
// other balloons (freeCpus) as the balloon may be
// inflated to fit the container.
cpu.Available = *resource.NewMilliQuantity(
int64(bln.MaxAvailMilliCpus(p.freeCpus) - blnReqmCpu),
resource.DecimalSI)

// "Allocatable" is the largest CPU request of a
// container that can be fit into the balloon, given
// that this or other balloons do not include any
// containers.
maxBlnSize := p.ifreeCpus.Size()
if bln.Def.MinBalloons > 0 {
// If this is a pre-created balloon, then
// ifreeCpus is missing CPUs pre-allocated for
// it.
maxBlnSize += bln.Def.MinCpus
}
if bln.Def.MaxCpus == NoLimit || bln.Def.MaxCpus > maxBlnSize {
cpu.Allocatable = *resource.NewMilliQuantity(
1000*int64(maxBlnSize),
resource.DecimalSI)
} else {
cpu.Allocatable = *resource.NewMilliQuantity(
1000*int64(bln.Def.MaxCpus),
resource.DecimalSI)
}

// "Capacity" is the total number of CPUs available in
// the system, including CPUs not allowed to be used
// by the policy.
cpu.Capacity = *resource.NewMilliQuantity(
int64(sysmCpu),
resource.DecimalSI)
zone.Resources = append(zone.Resources, cpu)

attributes := []*policyapi.ZoneAttribute{
{
// "cpuset" are CPUs allowed only to
// containers in this balloon.
Name: policyapi.CPUsAttribute,
Value: bln.Cpus.String(),
},
{
// "shared cpuset" are CPUs allowed to
// containers in this and other
// balloons that shareIdleCPUsInSame
// scope.
Name: policyapi.SharedCPUsAttribute,
Value: bln.SharedIdleCpus.String(),
},
{
// "excess cpus" is the largest CPU
// request of a container that fits
// into this balloon without inflating
// it.
Name: policyapi.ExcessCPUsAttribute,
Value: fmt.Sprintf("%dm", bln.AvailMilliCpus() - blnReqmCpu),
},
}
zone.Attributes = append(zone.Attributes, attributes...)

zones = append(zones, zone)

// TODO: Discuss and agree about exposing
// balloon-container relation in the zone tree, and
// CPUs and memories pinning on each container.
//
// A container assigned into a balloon is exposed as a
// "allocation for container" subzone whose parent is
// the balloon zone. The subzone has following
// resources:
//
// "Capacity": container's resource usage
// limit. Regarding CPU capacity specifically, CPU
// usage of the container is limited by
// resources.limits.cpu and the number of allowed CPUs
// (balloon + shared). "Capacity" reflects the
// effective (the tighter) limit.
//
// "Allocatable": container resource request. This
// reflects the affect of the container in the balloon
// size. When the balloon has no "excess cpus", sum of
// "Allocatable" CPUs of its containers equals to
// balloon size.
//
// "Available": always 0. This prevents possible kube
// scheduler extensions from thinking they could
// schedule anything specifically onto these subzones.
//
// Most importantly, the attributes of the subzone
// include cpuset and memory nodes allowed for the
// container. Yet the cpuset consist of balloon's own
// and shared CPUs, memory nodes cannot be deduced
// from anywhere else. They are container-specific due
// to balloon-type and annotation options that specify
// if memory should be pinned at all, and on which
// memory types they should be pinned to. Moreover, it
// is possible that the set of allowed memory nodes is
// expanded due to memory requests that do not fit on
// a limited number of nodes in the set.

// TODO: make expose-allocations-as-topology-zones a
// policy option (overridable in balloon-types),
// default to false. This will be the recommended way
// to observe how containers are assigned into
// balloons and what CPUs an memories each container
// is allowed to use.
if !exposeAllocations {
continue
}
for _, ctrIDs := range bln.PodIDs {
for _, ctrID := range ctrIDs {
c, ok := p.cch.LookupContainer(ctrID)
if !ok {
continue
}
czone := &policyapi.TopologyZone{
Name: c.PrettyName(),
Type: policyapi.ContainerAllocationZoneType,
}
ctrLimitmCpu := p.containerLimitedMilliCpus(ctrID)
ctrReqmCpu := p.containerRequestedMilliCpus(ctrID)
ctrCapacitymCpu := ctrLimitmCpu
ctrCpusetCpus := c.GetCpusetCpus()
ctrAllowedmCpu := sysmCpu
if ctrCpusetCpus != "" {
ctrAllowedmCpu = 1000 * cpuset.MustParse(ctrCpusetCpus).Size()
}
if ctrLimitmCpu == 0 || ctrLimitmCpu > ctrAllowedmCpu {
ctrCapacitymCpu = ctrAllowedmCpu
}
czone.Resources = []*policyapi.ZoneResource{
{
Name: policyapi.CPUResource,
Capacity: *resource.NewMilliQuantity(
int64(ctrCapacitymCpu),
resource.DecimalSI),
Allocatable: *resource.NewMilliQuantity(
int64(ctrReqmCpu),
resource.DecimalSI),
},
}
czone.Parent = zone.Name
czone.Attributes = []*policyapi.ZoneAttribute{
{
Name: policyapi.CPUsAttribute,
Value: ctrCpusetCpus,
},
{
Name: policyapi.MemsetAttribute,
Value: c.GetCpusetMems(),
},
}
zones = append(zones, czone)
}
}
}
return zones
}

// balloonByContainer returns a balloon that contains a container.
Expand Down Expand Up @@ -1121,6 +1302,7 @@ func (p *balloons) setConfig(bpoptions *BalloonsOptions) error {
}
}
}
p.ifreeCpus = p.freeCpus.Clone()

// Finish balloon instance initialization.
log.Info("%s policy balloons:", PolicyName)
Expand Down
10 changes: 10 additions & 0 deletions pkg/resmgr/policy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,22 @@ const (
CPUResource = "cpu"
// MemsetAttribute is the attribute name for assignable memory set
MemsetAttribute = "memory set"
// CPUsAttribute is the attribute name for the assignable CPU set
CPUsAttribute = "cpuset"
// SharedCPUsAttribute is the attribute name for the assignable shared CPU set
SharedCPUsAttribute = "shared cpuset"
// ReservedCPUsAttribute is the attribute name for assignable the reserved CPU set
ReservedCPUsAttribute = "reserved cpuset"
// IsolatedCPUsAttribute is the attribute name for the assignable isolated CPU set
IsolatedCPUsAttribute = "isolated cpuset"
// ExcessCPUsAttribute is the attribute name for CPUs that
// have been allocated yet not requested. For instance,
// containers in a balloon request 1300 mCPU in total, so at
// least 2 CPUs must be allocated to the balloon. This results
// in excess 700 mCPUs available for bursting, for instance.
ExcessCPUsAttribute = "excess cpus"
// Exporting containers as topology subzones
ContainerAllocationZoneType = "allocation for container"
)

// TopologyZone provides policy-/pool-specific data for 'node resource topology' CRs.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
config:
pinCPU: true
pinMemory: true
allocatorTopologyBalancing: true
reservedResources:
cpu: cpuset:3
agent:
nodeResourceTopology: true
log:
debug:
- resource-manager
- policy
- cache
source: true
klog:
skip_headers: true
instrumentation:
reportPeriod: 60s
samplingRatePerMillion: 1000000
balloonTypes:
- name: fullsocket
pinMemory: false
maxCPUs: 4
minCPUs: 1
minBalloons: 2
maxBalloons: 2
shareIdleCPUsInSame: package
namespaces:
- "*"
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
cleanup() {
vm-command "kubectl delete pods --all --now"
helm-terminate
}

cleanup
helm_config=${TEST_DIR}/balloons-nrt.cfg helm-launch balloons

export get_nrt="kubectl get noderesourcetopologies.topology.node.k8s.io \$(hostname)"

verify-zone-attribute() {
local zone_name=$1
local attribute_name=$2
local expected_value_re=$3
echo ""
echo "### Verifying topology zone $zone_name attribute $attribute_name value matches $expected_value_re"
vm-command "$get_nrt -o json | jq -r '.zones[] | select (.name == \"$zone_name\").attributes[] | select(.name == \"$attribute_name\").value'"
[[ "$COMMAND_OUTPUT" =~ $expected_value_re ]] ||
command-error "expected zone $zone_name attribute $attribute_name value $expected_value, got: $COMMAND_OUTPUT"
}

verify-zone-resource() {
local zone_name=$1
local resource_name=$2
local resource_field=$3
local expected_value=$4
echo ""
echo "### Verifying topology zone $zone_name resource $resouce_name field $resource_field equals $expected_value"
vm-command "$get_nrt -o json | jq -r '.zones[] | select (.name == \"$zone_name\").resources[] | select(.name == \"$resource_name\").$resource_field'"
[[ "$COMMAND_OUTPUT" == "$expected_value" ]] ||
command-error "expected zone $zone_name resource $resource_name.$resource_field $expected_value, got: $COMMAND_OUTPUT"
}

# Print full NRT yaml for debugging
vm-command "$get_nrt -o yaml"

# Verify zones when fullsocket balloons do not include containers.
verify-zone-attribute "fullsocket[0]" "cpuset" "[4-7]"
verify-zone-attribute "fullsocket[0]" "shared cpuset" "5-7|4,6-7|4-5,7|4-6"
verify-zone-attribute "fullsocket[0]" "excess cpus" "1"
verify-zone-attribute "fullsocket[1]" "cpuset" "0|1"
verify-zone-attribute "fullsocket[1]" "shared cpuset" "1-2|0,2"
verify-zone-attribute "fullsocket[1]" "excess cpus" "1"
verify-zone-attribute "reserved[0]" "cpuset" "3"
verify-zone-attribute "reserved[0]" "shared cpuset" "^\$"

verify-zone-resource "reserved[0]" "cpu" "capacity" "8"
verify-zone-resource "reserved[0]" "cpu" "allocatable" "6"
verify-zone-resource "fullsocket[0]" "cpu" "allocatable" "4"
verify-zone-resource "fullsocket[0]" "cpu" "available" "4"
verify-zone-resource "fullsocket[1]" "cpu" "allocatable" "4"
verify-zone-resource "fullsocket[1]" "cpu" "available" "4"

# Create burstable containers without CPU limits
CPUREQ="750m" MEMREQ="100M" CPULIM="" MEMLIM="500M"
POD_ANNOTATION='cpu.preserve.resource-policy.nri.io/container.pod0c1: "true"
memory.preserve.resource-policy.nri.io/container.pod0c2: "true"'
CONTCOUNT=3 create balloons-busybox

# Print full NRT yaml for debugging
vm-command "$get_nrt -o yaml"

# Verify selected zone attributes
verify-zone-resource "default/pod0/pod0c0" "cpu" "capacity" "4" # balloon's + shared CPUs
verify-zone-resource "default/pod0/pod0c0" "cpu" "allocatable" "750m" # requested CPUs
verify-zone-resource "default/pod0/pod0c0" "cpu" "available" "0" # nothing available on the subzone
verify-zone-resource "default/pod0/pod0c1" "cpu" "capacity" "" # preserve => should not exist

verify-zone-resource "default/pod0/pod0c2" "cpu" "capacity" "4" # expect same balloon as pod0c0
verify-zone-resource "default/pod0/pod0c2" "cpu" "allocatable" "750m"

# Create burstable containers with CPU limits
CPUREQ="200m" MEMREQ="100M" CPULIM="1500m" MEMLIM="500M"
POD_ANNOTATION=''
CONTCOUNT=2 create balloons-busybox

# Print full NRT yaml for debugging
vm-command "$get_nrt -o yaml"

verify-zone-resource "default/pod1/pod1c0" "cpu" "capacity" "1500m" # limit < allowed cpus
verify-zone-attribute "default/pod1/pod1c0" "cpuset" "0-2" # expected fullsocket[1]

0 comments on commit 69c0030

Please sign in to comment.