Add a proto for cgroup settings

buildbuddy-io · Nov 15, 2024 · bc61663 · bc61663
1 parent de4208a
commit bc61663
Showing 1 changed file with 130 additions and 0 deletions.
diff --git a/proto/scheduler.proto b/proto/scheduler.proto
@@ -116,6 +116,133 @@ message TaskSize {
   repeated CustomResource custom_resources = 4;
 }
 
+// CgroupSettings defines Linux cgroup2 options for an execution.
+//
+// Where applicable, the value -1 means "unlimited", which translates to the
+// "max" string in cgroup2 terms.
+//
+// Note: for numeric fields, a corresponding field prefixed with "use_" is
+// defined, which indicates whether the executor should apply the value. If it's
+// false, the setting should not be applied, meaning the default value is used
+// instead. This convention is used instead of defining 0 to mean "do not set,"
+// because in some cases it's valid to set a value to 0. proto3 optional is not
+// used because of the awkward Go API.
+message CgroupSettings {
+  // Proportion of CPU given to this task relative to other tasks in the parent
+  // cgroup. This provides for a best-effort CPU guarantee.
+  //
+  // Values 1 to 10000 are supported.
+  //
+  // Maps to "cpu.weight" in cgroup2.
+  int64 cpu_weight = 1;
+  bool use_cpu_weight = 2;
+
+  // Maximum CPU usage allowed per quota period.
+  //
+  // Maps to the "cpu.max" quota field in cgroup2.
+  int64 cpu_quota_limit_usec = 3;
+  // How often the CPU quota is refreshed. Longer periods may allow for higher
+  // burst CPU usage but may result in more stalling if the quota is exhausted
+  // very early in the period.
+  //
+  // Maps to the "cpu.max" period field in cgroup2.
+  int64 cpu_quota_period_usec = 4;
+  bool use_cpu_quota = 5;
+
+  // CPU time that can be "borrowed" from other quota periods to allow for burst
+  // CPU usage, in usec.
+  //
+  // Maps to "cpu.max.burst" in cgroup2.
+  int64 cpu_max_burst_usec = 6;
+  bool use_cpu_max_burst = 7;
+
+  // The requested minimum utilization (protection) as a percentage rational
+  // number, e.g. 12.34 for 12.34%.
+  //
+  // Maps to "cpu.uclamp.min" in cgroup2.
+  float cpu_uclamp_min = 8;
+  bool use_cpu_uclamp_min = 9;
+
+  // The requested maximum utilization (limit) as a percentage rational
+  // number, e.g. 98.76 for 98.76%.
+  //
+  // Maps to "cpu.uclamp.max" in cgroup2.
+  float cpu_uclamp_max = 10;
+  bool use_cpu_uclamp_max = 11;
+
+  // Limit after which memory usage is throttled and processes are put under
+  // heavy reclaim pressure.
+  //
+  // Maps to the "memory.high" field in cgroup2.
+  int64 memory_throttle_limit_bytes = 12;
+  bool use_memory_throttle_limit = 13;
+
+  // Limit after which processes in the cgroup are killed by the OOM killer.
+  //
+  // Maps to the "memory.max" field in cgroup2.
+  int64 memory_limit_bytes = 14;
+  bool use_memory_limit = 15;
+
+  // Best-effort memory protection - if the cgroup and its descendants are below
+  // this threshold then memory won't be reclaimed unless memory can't be
+  // reclaimed from other unprotected cgroups.
+  //
+  // Maps to "memory.low" in cgroup2.
+  int64 memory_soft_guarantee_bytes = 16;
+  bool use_memory_soft_guarantee = 17;
+
+  // Guaranteed minimum memory that can never be reclaimed by the system. If
+  // there is not enough memory to provide this guarantee then the OOM killer
+  // will be invoked.
+  //
+  // Maps to "memory.min" in cgroup2.
+  int64 memory_minimum_bytes = 18;
+  bool use_memory_minimum = 19;
+
+  // Hard limit for anonymous swap memory.
+  //
+  // Maps to "memory.swap.max" in cgroup2.
+  int64 swap_limit_bytes = 20;
+  bool use_swap_limit = 21;
+
+  // Basic IO quality of service mechanism defined in terms of a single latency
+  // target number. Specifies the number of microseconds a process can wait
+  // before IO from other processes is given to it.
+  //
+  // Maps to "io.latency" in cgroup2. The major/minor device numbers are not
+  // defined here because these may differ from one executor to another.
+  int64 block_io_latency_target_usec = 22;
+  bool use_block_io_latency_target = 23;
+
+  // Proportion of IO time given to this task relative to other tasks in the
+  // parent cgroup. This weight is applied only to the disk where all action IO
+  // is performed. Other IO block devices receive the default weight.
+  //
+  // Values 1 to 10000 are supported.
+  //
+  // Maps to "io.weight" in cgroup2. The major/minor device numbers are not
+  // defined here because these may differ from one executor to another.
+  int64 block_io_weight = 24;
+  bool use_block_io_weight = 25;
+
+  // IO limit for the block device where all action IO is performed.
+  //
+  // Maps to "io.max" in cgroup2. The major/minor device numbers are not defined
+  // here because these may differ from one executor to another.
+  BlockIOLimits block_io_limit = 26;
+
+  message BlockIOLimits {
+    // Max read operations per second
+    int64 riops = 1;
+    // Max write operations per second
+    int64 wiops = 2;
+    // Max read bytes per second
+    int64 rbps = 3;
+    // Max write bytes per second
+    int64 wbps = 4;
+  }
+}
+
 // Next ID: 9
 message SchedulingMetadata {
   // Task size used for scheduling purposes, when the scheduler is deciding
@@ -179,6 +306,9 @@ message SchedulingMetadata {
   // priority of tasks belonging to different groups; it only affects the
   // relative priority of tasks within a group.
   int32 priority = 11;
+
+  // cgroup2 settings. Will be set only for Linux executions.
+  CgroupSettings cgroup_settings = 12;
 }
 
 message ScheduleTaskRequest {