-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: Adding skeleton for oomd cfgen. Tried to abstract the chef model into types and make methods matching the host type and template type heirarchy. Reviewed By: anps77 Differential Revision: D56667715 Privacy Context Container: L1225879 fbshipit-source-id: d369dfc19f1efc5d4ca3abbbd69fe35e222758ca
- Loading branch information
1 parent
f78bcba
commit a139673
Showing
7 changed files
with
468 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
options: | ||
autoclean-keep-versions: 30 | ||
packages: | ||
fb-oomd-cfgen: | ||
packager: resource_control | ||
build_architectures: [x86_64, aarch64] | ||
summary: fb-oomd Config Generator | ||
description: Generates configuration for fb-oomd | ||
rules: | ||
buck2:oomd/cfgen:cfgen: | ||
oomd/cfgen: | ||
path: /usr/facebook/fb-oomd/cfgen/cfgen | ||
buck2:oomd/cfgen:cfgen_differ_config: | ||
oomd/cfgen/cfgen_differ_config/differ_config.json: | ||
path: /usr/facebook/fb-oomd/cfgen/differ_config.json | ||
change_log_paths: | ||
- oomd/cfgen |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. | ||
|
||
#![deny(warnings)] | ||
|
||
mod types; | ||
|
||
use libcfgen::prelude::*; | ||
use types::*; | ||
|
||
fn oomd_json(node: &Node) -> json::JsonValue { | ||
let attrs = get_attributes(node); | ||
default_json_config(&attrs) | ||
// TODO(chengxiong) add other templates | ||
} | ||
|
||
fn oomd_dropin(node: &Node) -> Dropin { | ||
// TODO(chengxiong): implement this | ||
libcfgen::DropinBuilder::new() | ||
.with_recommended_heap_profiling("fb-oomd") | ||
.build(node) | ||
} | ||
|
||
fn default_json_config(attrs: &ConfigParams) -> json::JsonValue { | ||
let mut rulesets = Vec::new(); | ||
rulesets.push(rule_system_overview(attrs)); | ||
rulesets.append(&mut rules_restart_cgroup_on_mem_threshold(attrs)); | ||
// TODO(chengxiong): add more rule sections | ||
json::object! { | ||
"rulesets": rulesets, | ||
} | ||
} | ||
|
||
fn rule_system_overview(attrs: &ConfigParams) -> json::JsonValue { | ||
let cgroup = if [HostType::ShellServer, HostType::OnDemand].contains(&attrs.host_type) { | ||
attrs.oomd2.oomd_target.as_str() | ||
} else { | ||
"workload.slice" | ||
}; | ||
|
||
let mut rule = json::object! { | ||
"name": "system_overview", | ||
"silence-logs": "engine", | ||
"detectors": [ | ||
[ | ||
"records system stats", | ||
{ | ||
"name": "dump_cgroup_overview", | ||
"args": { | ||
"cgroup": cgroup, | ||
} | ||
} | ||
] | ||
], | ||
"actions": [ | ||
{ | ||
"name": "continue", | ||
"args": {}, | ||
} | ||
] | ||
}; | ||
|
||
if attrs.host_type == HostType::OnDemand { | ||
rule["drop-in"] = json::object! { | ||
"detectors": true, | ||
"actions": true, | ||
}; | ||
} | ||
|
||
rule | ||
} | ||
|
||
fn rules_restart_cgroup_on_mem_threshold(attrs: &ConfigParams) -> Vec<json::JsonValue> { | ||
attrs | ||
.oomd2 | ||
.oomd_restart_threshold | ||
.iter() | ||
.map(|(cgroup, params)| { | ||
json::object! { | ||
"name": format!("restart {} on memory threshold", cgroup), | ||
"detectors":[ | ||
[ | ||
"memory usage above", | ||
{ | ||
"name": attrs.oomd2.plugins["memory_above"].as_str(), | ||
"args": { | ||
"cgroup": cgroup.as_str(), | ||
"threshold_anon": params.threshold.as_str(), | ||
"duration": params.duration.as_str(), | ||
} | ||
} | ||
] | ||
], | ||
"actions":[ | ||
{ | ||
"name": "systemd_restart", | ||
"args": { | ||
"service": params.service_name.as_str(), | ||
"post_action_delay": params.post_action_delay.as_str(), | ||
"dry": "false", | ||
} | ||
} | ||
] | ||
} | ||
}) | ||
.collect::<Vec<_>>() | ||
} | ||
|
||
fn get_attributes(node: &Node) -> ConfigParams { | ||
ConfigParams { | ||
host_type: get_host_type(node), | ||
fbtax2: FBTax2Attributes { | ||
blacklisted_jobs: Vec::new(), | ||
on_ssd: on_ssd(node), | ||
io_latency_supported: io_latency_supported(node), | ||
io_cost_supported: false, | ||
disable_swap_protection: false, | ||
workload_high_pressure_threshold: String::from("80"), | ||
workload_high_pressure_duration: String::from("180"), | ||
workload_monitoring_slice: String::from("workload.slice/workload-tw.slice"), | ||
post_workload_kill_delay: None, | ||
oomd_extra_rulesets: Vec::new(), | ||
low_swap_threshold: String::from("10"), | ||
}, | ||
oomd2: Oomd2Attributes { | ||
blacklisted_jobs: Vec::new(), | ||
disable_swap_protection: false, | ||
plugins: convert_args!(btreemap!( | ||
"pressure_above" => "pressure_above", | ||
"pressure_rising_beyond" => "pressure_rising_beyond", | ||
"swap_free" => "swap_free", | ||
"kill_by_memory_size_or_growth" => "kill_by_memory_size_or_growth", | ||
"kill_by_swap_usage" => "kill_by_swap_usage", | ||
"memory_above" => "memory_above", | ||
"memory_reclaim"=> "memory_reclaim", | ||
"senpai" => "senpai", | ||
)), | ||
oomd_dry: true, | ||
oomd_disable_on_drop_in: false, | ||
oomd_target: String::from("system.slice"), | ||
oomd_action_target: String::from("system.slice"), | ||
oomd_high_threshold: String::from("80"), | ||
oomd_high_threshold_duration: String::from("60"), | ||
oomd_threshold: String::from("60"), | ||
oomd_threshold_duration: String::from("90"), | ||
oomd_min_swap_pct: String::from("15"), | ||
oomd_restart_threshold: oomd2_oomd_restart_threshold(), | ||
oomd_reclaim_duation: String::from("10"), | ||
oomd_post_action_delay: String::from("15"), | ||
}, | ||
devserver: DevServerAttributes { | ||
user_mempress: String::from("60"), | ||
system_mempress: String::from("80"), | ||
}, | ||
senpai: SenpaiAttributes { | ||
silence_logs: String::from("engine"), | ||
target: None, | ||
limit_min_bytes: None, | ||
io_pressure_pct: String::from("1.0"), | ||
memory_high_timeout_ms: String::from("20"), | ||
scuba_logger_dataset: String::from("perfpipe_senpai_events"), | ||
}, | ||
} | ||
} | ||
|
||
fn oomd2_oomd_restart_threshold() -> BTreeMap<String, OomdRestartThreshold> { | ||
btreemap! { | ||
String::from("smc_proxy.service") => OomdRestartThreshold{ | ||
threshold: String::from("10G"), | ||
duration: String::from("10"), | ||
post_action_delay: String::from("20"), | ||
service_name: String::from("smc_proxy.service")} | ||
} | ||
} | ||
|
||
fn on_ssd(_node: &Node) -> bool { | ||
true | ||
// TODO(chengxiong): add this logic https://fburl.com/code/dqdu7ves | ||
} | ||
|
||
fn io_latency_supported(_node: &Node) -> bool { | ||
false | ||
// TODO(chengxiong): add this logic https://fburl.com/code/dqdu7ves | ||
} | ||
|
||
fn get_host_type(_node: &Node) -> HostType { | ||
// TODO(chengxiong): add logic to determine host types. | ||
HostType::Default | ||
} | ||
|
||
fn main() -> anyhow::Result<()> { | ||
let mut b = libcfgen::Builder::new(); | ||
b = b.dynamic_json("oomd2.json", |node| Ok(oomd_json(node))); | ||
b = b.dropin(|node| Ok(oomd_dropin(node))); | ||
b.run() | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use libcfgentest::*; | ||
use rstest::rstest; | ||
|
||
use super::*; | ||
|
||
#[rstest] | ||
#[case::shard99("twshared2434.02.cco1", HostType::Default)] | ||
fn test_get_host_type(#[case] hostname: &str, #[case] expected: HostType) { | ||
let node = FakeNodeBuilder::new().hostname(hostname).build(); | ||
assert_eq!(get_host_type(&node), expected); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
use std::collections::BTreeMap; | ||
|
||
use libcfgen::prelude::json::JsonValue; | ||
|
||
#[derive(Debug, PartialEq, Eq)] | ||
pub enum HostType { | ||
Default, | ||
ShellServer, | ||
OnDemand, | ||
} | ||
|
||
pub struct ConfigParams { | ||
pub host_type: HostType, | ||
pub fbtax2: FBTax2Attributes, | ||
pub oomd2: Oomd2Attributes, | ||
pub devserver: DevServerAttributes, | ||
pub senpai: SenpaiAttributes, | ||
} | ||
|
||
pub struct FBTax2Attributes { | ||
pub blacklisted_jobs: Vec<String>, | ||
pub on_ssd: bool, | ||
pub io_latency_supported: bool, | ||
pub io_cost_supported: bool, | ||
pub disable_swap_protection: bool, | ||
pub workload_high_pressure_threshold: String, | ||
pub workload_high_pressure_duration: String, | ||
pub workload_monitoring_slice: String, | ||
pub post_workload_kill_delay: Option<String>, | ||
pub oomd_extra_rulesets: Vec<JsonValue>, | ||
pub low_swap_threshold: String, | ||
} | ||
|
||
pub struct Oomd2Attributes { | ||
pub blacklisted_jobs: Vec<String>, | ||
pub disable_swap_protection: bool, | ||
pub plugins: BTreeMap<String, String>, | ||
pub oomd_dry: bool, | ||
pub oomd_disable_on_drop_in: bool, | ||
pub oomd_target: String, | ||
pub oomd_action_target: String, | ||
pub oomd_high_threshold: String, | ||
pub oomd_high_threshold_duration: String, | ||
pub oomd_threshold: String, | ||
pub oomd_threshold_duration: String, | ||
pub oomd_min_swap_pct: String, | ||
pub oomd_restart_threshold: BTreeMap<String, OomdRestartThreshold>, | ||
pub oomd_reclaim_duation: String, | ||
pub oomd_post_action_delay: String, | ||
} | ||
|
||
pub struct DevServerAttributes { | ||
pub user_mempress: String, | ||
pub system_mempress: String, | ||
} | ||
|
||
pub struct SenpaiAttributes { | ||
pub silence_logs: String, | ||
pub target: Option<String>, | ||
pub limit_min_bytes: Option<String>, | ||
pub io_pressure_pct: String, | ||
pub memory_high_timeout_ms: String, | ||
pub scuba_logger_dataset: String, | ||
} | ||
|
||
pub struct OomdRestartThreshold { | ||
pub threshold: String, | ||
pub duration: String, | ||
pub post_action_delay: String, | ||
pub service_name: String, | ||
} |
Oops, something went wrong.