Skip to content

Commit

Permalink
Add a proto for cgroup settings
Browse files Browse the repository at this point in the history
  • Loading branch information
bduffany committed Nov 15, 2024
1 parent de4208a commit bc61663
Showing 1 changed file with 130 additions and 0 deletions.
130 changes: 130 additions & 0 deletions proto/scheduler.proto
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,133 @@ message TaskSize {
repeated CustomResource custom_resources = 4;
}

// CgroupSettings defines Linux cgroup2 options for an execution.
//
// Where applicable, the value -1 means "unlimited", which translates to the
// "max" string in cgroup2 terms.
//
// Note: for numeric fields, a corresponding field prefixed with "use_" is
// defined, which indicates whether the executor should apply the value. If it's
// false, the setting should not be applied, meaning the default value is used
// instead. This convention is used instead of defining 0 to mean "do not set,"
// because in some cases it's valid to set a value to 0. proto3 optional is not
// used because of the awkward Go API.
message CgroupSettings {
// Proportion of CPU given to this task relative to other tasks in the parent
// cgroup. This provides for a best-effort CPU guarantee.
//
// Values 1 to 10000 are supported.
//
// Maps to "cpu.weight" in cgroup2.
int64 cpu_weight = 1;
bool use_cpu_weight = 2;

// Maximum CPU usage allowed per quota period.
//
// Maps to the "cpu.max" quota field in cgroup2.
int64 cpu_quota_limit_usec = 3;
// How often the CPU quota is refreshed. Longer periods may allow for higher
// burst CPU usage but may result in more stalling if the quota is exhausted
// very early in the period.
//
// Maps to the "cpu.max" period field in cgroup2.
int64 cpu_quota_period_usec = 4;
bool use_cpu_quota = 5;

// CPU time that can be "borrowed" from other quota periods to allow for burst
// CPU usage, in usec.
//
// Maps to "cpu.max.burst" in cgroup2.
int64 cpu_max_burst_usec = 6;
bool use_cpu_max_burst = 7;

// The requested minimum utilization (protection) as a percentage rational
// number, e.g. 12.34 for 12.34%.
//
// Maps to "cpu.uclamp.min" in cgroup2.
float cpu_uclamp_min = 8;
bool use_cpu_uclamp_min = 9;

// The requested maximum utilization (limit) as a percentage rational
// number, e.g. 98.76 for 98.76%.
//
// Maps to "cpu.uclamp.max" in cgroup2.
float cpu_uclamp_max = 10;
bool use_cpu_uclamp_max = 11;

// Limit after which memory usage is throttled and processes are put under
// heavy reclaim pressure.
//
// Maps to the "memory.high" field in cgroup2.
int64 memory_throttle_limit_bytes = 12;
bool use_memory_throttle_limit = 13;

// Limit after which processes in the cgroup are killed by the OOM killer.
//
// Maps to the "memory.max" field in cgroup2.
int64 memory_limit_bytes = 14;
bool use_memory_limit = 15;

// Best-effort memory protection - if the cgroup and its descendants are below
// this threshold then memory won't be reclaimed unless memory can't be
// reclaimed from other unprotected cgroups.
//
// Maps to "memory.low" in cgroup2.
int64 memory_soft_guarantee_bytes = 16;
bool use_memory_soft_guarantee = 17;

// Guaranteed minimum memory that can never be reclaimed by the system. If
// there is not enough memory to provide this guarantee then the OOM killer
// will be invoked.
//
// Maps to "memory.min" in cgroup2.
int64 memory_minimum_bytes = 18;
bool use_memory_minimum = 19;

// Hard limit for anonymous swap memory.
//
// Maps to "memory.swap.max" in cgroup2.
int64 swap_limit_bytes = 20;
bool use_swap_limit = 21;

// Basic IO quality of service mechanism defined in terms of a single latency
// target number. Specifies the number of microseconds a process can wait
// before IO from other processes is given to it.
//
// Maps to "io.latency" in cgroup2. The major/minor device numbers are not
// defined here because these may differ from one executor to another.
int64 block_io_latency_target_usec = 22;
bool use_block_io_latency_target = 23;

// Proportion of IO time given to this task relative to other tasks in the
// parent cgroup. This weight is applied only to the disk where all action IO
// is performed. Other IO block devices receive the default weight.
//
// Values 1 to 10000 are supported.
//
// Maps to "io.weight" in cgroup2. The major/minor device numbers are not
// defined here because these may differ from one executor to another.
int64 block_io_weight = 24;
bool use_block_io_weight = 25;

// IO limit for the block device where all action IO is performed.
//
// Maps to "io.max" in cgroup2. The major/minor device numbers are not defined
// here because these may differ from one executor to another.
BlockIOLimits block_io_limit = 26;

message BlockIOLimits {
// Max read operations per second
int64 riops = 1;
// Max write operations per second
int64 wiops = 2;
// Max read bytes per second
int64 rbps = 3;
// Max write bytes per second
int64 wbps = 4;
}
}

// Next ID: 9
message SchedulingMetadata {
// Task size used for scheduling purposes, when the scheduler is deciding
Expand Down Expand Up @@ -179,6 +306,9 @@ message SchedulingMetadata {
// priority of tasks belonging to different groups; it only affects the
// relative priority of tasks within a group.
int32 priority = 11;

// cgroup2 settings. Will be set only for Linux executions.
CgroupSettings cgroup_settings = 12;
}

message ScheduleTaskRequest {
Expand Down

0 comments on commit bc61663

Please sign in to comment.