Skip to content

Commit

Permalink
cfgen scaffolding
Browse files Browse the repository at this point in the history
Summary: Adding skeleton for oomd cfgen. Tried to abstract the chef model into types and make methods matching the host type and template type heirarchy.

Reviewed By: anps77

Differential Revision:
D56667715

Privacy Context Container: L1225879

fbshipit-source-id: d369dfc19f1efc5d4ca3abbbd69fe35e222758ca
  • Loading branch information
Chengxiong Ruan authored and facebook-github-bot committed May 16, 2024
1 parent f78bcba commit a139673
Show file tree
Hide file tree
Showing 7 changed files with 468 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/oomd/cfgen/packman.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
options:
autoclean-keep-versions: 30
packages:
fb-oomd-cfgen:
packager: resource_control
build_architectures: [x86_64, aarch64]
summary: fb-oomd Config Generator
description: Generates configuration for fb-oomd
rules:
buck2:oomd/cfgen:cfgen:
oomd/cfgen:
path: /usr/facebook/fb-oomd/cfgen/cfgen
buck2:oomd/cfgen:cfgen_differ_config:
oomd/cfgen/cfgen_differ_config/differ_config.json:
path: /usr/facebook/fb-oomd/cfgen/differ_config.json
change_log_paths:
- oomd/cfgen
210 changes: 210 additions & 0 deletions src/oomd/cfgen/src/cfgen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.

#![deny(warnings)]

mod types;

use libcfgen::prelude::*;
use types::*;

fn oomd_json(node: &Node) -> json::JsonValue {
let attrs = get_attributes(node);
default_json_config(&attrs)
// TODO(chengxiong) add other templates
}

fn oomd_dropin(node: &Node) -> Dropin {
// TODO(chengxiong): implement this
libcfgen::DropinBuilder::new()
.with_recommended_heap_profiling("fb-oomd")
.build(node)
}

fn default_json_config(attrs: &ConfigParams) -> json::JsonValue {
let mut rulesets = Vec::new();
rulesets.push(rule_system_overview(attrs));
rulesets.append(&mut rules_restart_cgroup_on_mem_threshold(attrs));
// TODO(chengxiong): add more rule sections
json::object! {
"rulesets": rulesets,
}
}

fn rule_system_overview(attrs: &ConfigParams) -> json::JsonValue {
let cgroup = if [HostType::ShellServer, HostType::OnDemand].contains(&attrs.host_type) {
attrs.oomd2.oomd_target.as_str()
} else {
"workload.slice"
};

let mut rule = json::object! {
"name": "system_overview",
"silence-logs": "engine",
"detectors": [
[
"records system stats",
{
"name": "dump_cgroup_overview",
"args": {
"cgroup": cgroup,
}
}
]
],
"actions": [
{
"name": "continue",
"args": {},
}
]
};

if attrs.host_type == HostType::OnDemand {
rule["drop-in"] = json::object! {
"detectors": true,
"actions": true,
};
}

rule
}

fn rules_restart_cgroup_on_mem_threshold(attrs: &ConfigParams) -> Vec<json::JsonValue> {
attrs
.oomd2
.oomd_restart_threshold
.iter()
.map(|(cgroup, params)| {
json::object! {
"name": format!("restart {} on memory threshold", cgroup),
"detectors":[
[
"memory usage above",
{
"name": attrs.oomd2.plugins["memory_above"].as_str(),
"args": {
"cgroup": cgroup.as_str(),
"threshold_anon": params.threshold.as_str(),
"duration": params.duration.as_str(),
}
}
]
],
"actions":[
{
"name": "systemd_restart",
"args": {
"service": params.service_name.as_str(),
"post_action_delay": params.post_action_delay.as_str(),
"dry": "false",
}
}
]
}
})
.collect::<Vec<_>>()
}

fn get_attributes(node: &Node) -> ConfigParams {
ConfigParams {
host_type: get_host_type(node),
fbtax2: FBTax2Attributes {
blacklisted_jobs: Vec::new(),
on_ssd: on_ssd(node),
io_latency_supported: io_latency_supported(node),
io_cost_supported: false,
disable_swap_protection: false,
workload_high_pressure_threshold: String::from("80"),
workload_high_pressure_duration: String::from("180"),
workload_monitoring_slice: String::from("workload.slice/workload-tw.slice"),
post_workload_kill_delay: None,
oomd_extra_rulesets: Vec::new(),
low_swap_threshold: String::from("10"),
},
oomd2: Oomd2Attributes {
blacklisted_jobs: Vec::new(),
disable_swap_protection: false,
plugins: convert_args!(btreemap!(
"pressure_above" => "pressure_above",
"pressure_rising_beyond" => "pressure_rising_beyond",
"swap_free" => "swap_free",
"kill_by_memory_size_or_growth" => "kill_by_memory_size_or_growth",
"kill_by_swap_usage" => "kill_by_swap_usage",
"memory_above" => "memory_above",
"memory_reclaim"=> "memory_reclaim",
"senpai" => "senpai",
)),
oomd_dry: true,
oomd_disable_on_drop_in: false,
oomd_target: String::from("system.slice"),
oomd_action_target: String::from("system.slice"),
oomd_high_threshold: String::from("80"),
oomd_high_threshold_duration: String::from("60"),
oomd_threshold: String::from("60"),
oomd_threshold_duration: String::from("90"),
oomd_min_swap_pct: String::from("15"),
oomd_restart_threshold: oomd2_oomd_restart_threshold(),
oomd_reclaim_duation: String::from("10"),
oomd_post_action_delay: String::from("15"),
},
devserver: DevServerAttributes {
user_mempress: String::from("60"),
system_mempress: String::from("80"),
},
senpai: SenpaiAttributes {
silence_logs: String::from("engine"),
target: None,
limit_min_bytes: None,
io_pressure_pct: String::from("1.0"),
memory_high_timeout_ms: String::from("20"),
scuba_logger_dataset: String::from("perfpipe_senpai_events"),
},
}
}

fn oomd2_oomd_restart_threshold() -> BTreeMap<String, OomdRestartThreshold> {
btreemap! {
String::from("smc_proxy.service") => OomdRestartThreshold{
threshold: String::from("10G"),
duration: String::from("10"),
post_action_delay: String::from("20"),
service_name: String::from("smc_proxy.service")}
}
}

fn on_ssd(_node: &Node) -> bool {
true
// TODO(chengxiong): add this logic https://fburl.com/code/dqdu7ves
}

fn io_latency_supported(_node: &Node) -> bool {
false
// TODO(chengxiong): add this logic https://fburl.com/code/dqdu7ves
}

fn get_host_type(_node: &Node) -> HostType {
// TODO(chengxiong): add logic to determine host types.
HostType::Default
}

fn main() -> anyhow::Result<()> {
let mut b = libcfgen::Builder::new();
b = b.dynamic_json("oomd2.json", |node| Ok(oomd_json(node)));
b = b.dropin(|node| Ok(oomd_dropin(node)));
b.run()
}

#[cfg(test)]
mod tests {
use libcfgentest::*;
use rstest::rstest;

use super::*;

#[rstest]
#[case::shard99("twshared2434.02.cco1", HostType::Default)]
fn test_get_host_type(#[case] hostname: &str, #[case] expected: HostType) {
let node = FakeNodeBuilder::new().hostname(hostname).build();
assert_eq!(get_host_type(&node), expected);
}
}
71 changes: 71 additions & 0 deletions src/oomd/cfgen/src/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use std::collections::BTreeMap;

use libcfgen::prelude::json::JsonValue;

#[derive(Debug, PartialEq, Eq)]
pub enum HostType {
Default,
ShellServer,
OnDemand,
}

pub struct ConfigParams {
pub host_type: HostType,
pub fbtax2: FBTax2Attributes,
pub oomd2: Oomd2Attributes,
pub devserver: DevServerAttributes,
pub senpai: SenpaiAttributes,
}

pub struct FBTax2Attributes {
pub blacklisted_jobs: Vec<String>,
pub on_ssd: bool,
pub io_latency_supported: bool,
pub io_cost_supported: bool,
pub disable_swap_protection: bool,
pub workload_high_pressure_threshold: String,
pub workload_high_pressure_duration: String,
pub workload_monitoring_slice: String,
pub post_workload_kill_delay: Option<String>,
pub oomd_extra_rulesets: Vec<JsonValue>,
pub low_swap_threshold: String,
}

pub struct Oomd2Attributes {
pub blacklisted_jobs: Vec<String>,
pub disable_swap_protection: bool,
pub plugins: BTreeMap<String, String>,
pub oomd_dry: bool,
pub oomd_disable_on_drop_in: bool,
pub oomd_target: String,
pub oomd_action_target: String,
pub oomd_high_threshold: String,
pub oomd_high_threshold_duration: String,
pub oomd_threshold: String,
pub oomd_threshold_duration: String,
pub oomd_min_swap_pct: String,
pub oomd_restart_threshold: BTreeMap<String, OomdRestartThreshold>,
pub oomd_reclaim_duation: String,
pub oomd_post_action_delay: String,
}

pub struct DevServerAttributes {
pub user_mempress: String,
pub system_mempress: String,
}

pub struct SenpaiAttributes {
pub silence_logs: String,
pub target: Option<String>,
pub limit_min_bytes: Option<String>,
pub io_pressure_pct: String,
pub memory_high_timeout_ms: String,
pub scuba_logger_dataset: String,
}

pub struct OomdRestartThreshold {
pub threshold: String,
pub duration: String,
pub post_action_delay: String,
pub service_name: String,
}
Loading

0 comments on commit a139673

Please sign in to comment.