From aa6a8b5a3e91a3634bf20812ef0561797744248f Mon Sep 17 00:00:00 2001 From: Matthew Hielsberg Date: Tue, 19 Sep 2023 13:31:52 -0500 Subject: [PATCH] Initial PSI commit --- .../template_linux_pressure/6.0/README.md | 96 +++ .../6.0/template_linux_pressure.yaml | 657 ++++++++++++++++++ 2 files changed, 753 insertions(+) create mode 100644 Operating_Systems/Linux/template_linux_pressure/6.0/README.md create mode 100644 Operating_Systems/Linux/template_linux_pressure/6.0/template_linux_pressure.yaml diff --git a/Operating_Systems/Linux/template_linux_pressure/6.0/README.md b/Operating_Systems/Linux/template_linux_pressure/6.0/README.md new file mode 100644 index 000000000..c51a83ce4 --- /dev/null +++ b/Operating_Systems/Linux/template_linux_pressure/6.0/README.md @@ -0,0 +1,96 @@ +# Linux - Pressure Stall Information + +## Overview + +Self-contained template for monitoring pressure stall information on Linux systems. Source: +## Author + +Matthew Hielsberg - + +## Macros used + +|Name|Description|Default|Type| +|----|-----------|-------|----| +|{$CPU_FULL_AVG10_THRESH}|CPU starvation for ALL processes over 10 seconds|0|Integer| +|{$CPU_FULL_AVG60_THRESH}|CPU starvation for ALL processes over 60 seconds|0|Integer| +|{$CPU_FULL_AVG300_THRESH}|CPU starvation for ALL processes over 300 seconds|0|Integer| +|{$CPU_SOME_AVG10_THRESH}|CPU starvation for some processes over 10 seconds|75|Integer| +|{$CPU_SOME_AVG60_THRESH}|CPU starvation for some processes over 60 seconds|50|Integer| +|{$CPU_SOME_AVG300_THRESH}|CPU starvation for some processes over 300 seconds|25|Integer| +|{$IO_FULL_AVG10_THRESH}|IO starvation for ALL processes over 10 seconds|10|Integer| +|{$IO_FULL_AVG60_THRESH}|IO starvation for ALL processes over 60 seconds|5|Integer| +|{$IO_FULL_AVG300_THRESH}|IO starvation for ALL processes over 300 seconds|1|Integer| +|{$IO_SOME_AVG10_THRESH}|IO starvation for some processes over 10 seconds|50|Integer| +|{$IO_SOME_AVG60_THRESH}|IO starvation for some processes over 60 seconds|10|Integer| +|{$IO_SOME_AVG300_THRESH}|IO starvation for some processes over 300 seconds|5|Integer| +|{$MEMORY_FULL_AVG10_THRESH}|Memory starvation for ALL processes over 10 seconds|10|Integer| +|{$MEMORY_FULL_AVG60_THRESH}|Memory starvation for ALL processes over 60 seconds|5|Integer| +|{$MEMORY_FULL_AVG300_THRESH}|Memory starvation for ALL processes over 300 seconds|1|Integer| +|{$MEMORY_SOME_AVG10_THRESH}|Memory starvation for some processes over 10 seconds|50|Integer| +|{$MEMORY_SOME_AVG60_THRESH}|Memory starvation for some processes over 60 seconds|10|Integer| +|{$MEMORY_SOME_AVG300_THRESH}|Memory starvation for some processes over 300 seconds|5|Integer| + +## Template links + +There are no template links in this template. + +## Discovery rules + +There are no discovery rules in this template + +## Items collected + +|Name|Description|Type|Key and additional info| +|----|-----------|----|-----------------------| +|CPU Pressure Stall Information - Text|Service item for gathering cpu ''some'' pressure (10s,60s,300s)|TEXT|key: vfs.file.contents[/proc/pressure/cpu]| +|CPU Pressure Stall Information - Full - 10s Average|The percentage of time all tasks were stalled on the CPU over the last 10s window.|FLOAT|key: psi_mth.cpu.full.avg10, master_item key: vfs.file.contents[/proc/pressure/cpu]| +|CPU Pressure Stall Information - Full - 60s Average|The percentage of time all tasks were stalled on the CPU over the last 60s window.|FLOAT|key: psi_mth.cpu.full.avg60, master_item key: vfs.file.contents[/proc/pressure/cpu]| +|CPU Pressure Stall Information - Full - 300s Average|The percentage of time all tasks were stalled on the CPU over the last 300s window.|FLOAT|key: psi_mth.cpu.full.avg300, master_item key: vfs.file.contents[/proc/pressure/cpu]| +|CPU Pressure Stall Information - Some - 10s Average|The percentage of time some tasks were stalled on the CPU over the last 10s window.|FLOAT|key: psi_mth.cpu.some.avg10, master_item key: vfs.file.contents[/proc/pressure/cpu]| +|CPU Pressure Stall Information - Some - 60s Average|The percentage of time some tasks were stalled on the CPU over the last 60s window.|FLOAT|key: psi_mth.cpu.some.avg60, master_item key: vfs.file.contents[/proc/pressure/cpu]| +|CPU Pressure Stall Information - Some - 300s Average|The percentage of time some tasks were stalled on the CPU over the last 300s window.|FLOAT|key: psi_mth.cpu.some.avg300, master_item key: vfs.file.contents[/proc/pressure/cpu]| +|IO Pressure Stall Information - Text|Service item for gathering io ''some'' and ''full'' pressure (10s,60s,300s)|TEXT|key: vfs.file.contents[/proc/pressure/io]| +|IO Pressure Stall Information - Full - 10s Average|The percentage of time all tasks were waiting on IO over the last 10s window.|FLOAT|key: psi_mth.io.full.avg10, master_item key: vfs.file.contents[/proc/pressure/io]| +|IO Pressure Stall Information - Full - 60s Average|The percentage of time all tasks were waiting on IO over the last 60s window.|FLOAT|key: psi_mth.io.full.avg60, master_item key: vfs.file.contents[/proc/pressure/io]| +|IO Pressure Stall Information - Full - 300s Average|The percentage of time all tasks were waiting on IO over the last 300s window.|FLOAT|key: psi_mth.io.full.avg300, master_item key: vfs.file.contents[/proc/pressure/io]| +|IO Pressure Stall Information - Some - 10s Average|The percentage of time some tasks were waiting on IO over the last 10s window.|FLOAT|key: psi_mth.io.some.avg10, master_item key: vfs.file.contents[/proc/pressure/io]| +|IO Pressure Stall Information - Some - 60s Average|The percentage of time some tasks were waiting on IO over the last 60s window.|FLOAT|key: psi_mth.io.some.avg60, master_item key: vfs.file.contents[/proc/pressure/io]| +|IO Pressure Stall Information - Some - 300s Average|The percentage of time some tasks were waiting on IO over the last 300s window.|FLOAT|key: psi_mth.io.some.avg300, master_item key: vfs.file.contents[/proc/pressure/io]| +|Memory Pressure Stall Information - Text|Service item for gathering memory ''some'' and ''full'' pressure (10s,60s,300s)|TEXT|key: vfs.file.contents[/proc/pressure/memory]| +|Memory Pressure Stall Information - Full - 10s Average|The percentage of time all tasks were waiting on memory over the last 10s window.|FLOAT|key: psi_mth.memory.full.avg10, master_item key: vfs.file.contents[/proc/pressure/memory]| +|Memory Pressure Stall Information - Full - 60s Average'|The percentage of time all tasks were waiting on memory over the last 60s window.|FLOAT|key: psi_mth.memory.full.avg60, master_item key: vfs.file.contents[/proc/pressure/memory]| +|Memory Pressure Stall Information - Full - 300s Average|The percentage of time all tasks were waiting on memory over the last 300s window.|FLOAT|key: psi_mth.memory.full.avg300, master_item key: vfs.file.contents[/proc/pressure/memory]| +|Memory Pressure Stall Information - Some - 10s Average|The percentage of time tasks were waiting on memory over the last 10s window.|FLOAT|key: psi_mth.memory.some.avg10, master_item key: vfs.file.contents[/proc/pressure/memory]| +|Memory Pressure Stall Information - Some - 60s Average'|The percentage of time tasks were waiting on memory over the last 60s window.|FLOAT|key: psi_mth.memory.some.avg60, master_item key: vfs.file.contents[/proc/pressure/memory]| +|Memory Pressure Stall Information - Some - 300s Average|The percentage of time tasks were waiting on memory over the last 300s window.|FLOAT|key: psi_mth.memory.some.avg300, master_item key: vfs.file.contents[/proc/pressure/memory]| + +## Triggers + +|Name|Description|Expression|Priority| +|----|-----------|----------|--------| +|Linux PSI - CPU Full Avg 10 - Exceeds Threshold|The percentage of time all tasks were stalled on the CPU over the last 10s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.full.avg10)>{$CPU_FULL_AVG10_THRESH}|WARNING| +|Linux PSI - CPU Full Avg 60 - Exceeds Threshold|The percentage of time all tasks were stalled on the CPU over the last 60s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.full.avg60)>{$CPU_FULL_AVG60_THRESH}|WARNING| +|Linux PSI - CPU Full Avg 300 - Exceeds Threshold|The percentage of time all tasks were stalled on the CPU over the last 300s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.full.avg300)>{$CPU_FULL_AVG300_THRESH}|WARNING| +|Linux PSI - CPU Some Avg 10 - Exceeds Threshold|The percentage of time some tasks were stalled on the CPU over the last 10s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.some.avg10)>{$CPU_SOME_AVG10_THRESH}|INFO| +|Linux PSI - CPU Some Avg 60 - Exceeds Threshold|The percentage of time some tasks were stalled on the CPU over the last 60s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.some.avg60)>{$CPU_SOME_AVG60_THRESH}|INFO| +|Linux PSI - CPU Some Avg 300 - Exceeds Threshold|The percentage of time some tasks were stalled on the CPU over the last 300s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.some.avg300)>{$CPU_SOME_AVG300_THRESH}|INFO| +|Linux PSI - IO Full Avg 10 - Exceeds Threshold|The percentage of time all tasks were waiting on IO over the last 10s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.io.full.avg10)>{$IO_FULL_AVG10_THRESH}|INFO| +|Linux PSI - IO Full Avg 60 - Exceeds Threshold|The percentage of time all tasks were waiting on IO over the last 60s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.io.full.avg60)>{$IO_FULL_AVG60_THRESH}|INFO| +|Linux PSI - IO Full Avg 300 - Exceeds Threshold|The percentage of time all tasks were waiting on IO over the last 300s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.io.full.avg300)>{$IO_FULL_AVG300_THRESH}|INFO| +|Linux PSI - IO Some Avg 10 - Exceeds Threshold|The percentage of time some tasks were waiting on IO over the last 10s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.io.some.avg10)>{$IO_SOME_AVG10_THRESH}|INFO| +|Linux PSI - IO Some Avg 60 - Exceeds Threshold|The percentage of time some tasks were waiting on IO over the last 60s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.io.some.avg60)>{$IO_SOME_AVG60_THRESH}|INFO| +|Linux PSI - IO Some Avg 300 - Exceeds Threshold|The percentage of time some tasks were waiting on IO over the last 300s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.io.some.avg300)>{$IO_SOME_AVG300_THRESH}|INFO| +|Linux PSI - Memory Full Avg 10 - Exceeds Threshold|The percentage of time all tasks were waiting on memory over the last 10s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.memory.full.avg10)>{$MEMORY_FULL_AVG10_THRESH}|INFO| +|Linux PSI - Memory Full Avg 60 - Exceeds Threshold|The percentage of time all tasks were waiting on memory over the last 60s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.memory.full.avg60)>{$MEMORY_FULL_AVG60_THRESH}|INFO| +|Linux PSI - Memory Full Avg 300 - Exceeds Threshold|The percentage of time all tasks were waiting on memory over the last 300s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.memory.full.avg300)>{$MEMORY_FULL_AVG300_THRESH}|INFO| +|Linux PSI - Memory Some Avg 10 - Exceeds Threshold|The percentage of time some tasks were waiting on memory over the last 10s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.memory.some.avg10)>{$MEMORY_SOME_AVG10_THRESH}|INFO| +|Linux PSI - Memory Some Avg 60 - Exceeds Threshold|The percentage of time some tasks were waiting on memory over the last 60s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.memory.some.avg60)>{$MEMORY_SOME_AVG60_THRESH}|INFO| +|Linux PSI - Memory Some Avg 300 - Exceeds Threshold|The percentage of time some tasks were waiting on memory over the last 300s window exceeds the threshold|last(/Linux Pressure Stall Information - PSI/psi_mth.memory.some.avg300)>{$MEMORY_SOME_AVG300_THRESH}|INFO| + +## Graphs + +Three graphs are included, each of which contains the 10, 60 and 300 second averages for both 'some' and 'full'. + +- CPU Pressure Stall Information +- IO Pressure Stall Information +- Memory Pressure Stall Information diff --git a/Operating_Systems/Linux/template_linux_pressure/6.0/template_linux_pressure.yaml b/Operating_Systems/Linux/template_linux_pressure/6.0/template_linux_pressure.yaml new file mode 100644 index 000000000..cc73870d7 --- /dev/null +++ b/Operating_Systems/Linux/template_linux_pressure/6.0/template_linux_pressure.yaml @@ -0,0 +1,657 @@ +zabbix_export: + version: '6.0' + date: '2023-09-19T18:02:09Z' + groups: + - uuid: 846977d1dfed4968bc5f8bdb363285bc + name: 'Templates/Operating systems' + templates: + - uuid: fc3089e96aa34a9fa86fe178b7d2c9c9 + template: 'Linux Pressure Stall Information - PSI' + name: 'Linux Pressure Stall Information - PSI' + description: 'Provides access to the pressure stall info for cpu, memory and io.' + groups: + - name: 'Templates/Operating systems' + items: + - uuid: 4cd8a50ec30c4293b7cd1fcf3eca177e + name: 'CPU Pressure Stall Information - Full - 10s Average' + type: DEPENDENT + key: psi_mth.cpu.full.avg10 + delay: '0' + value_type: FLOAT + units: '%' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/cpu]' + tags: + - tag: component + value: cpu + triggers: + - uuid: df001b00ac534034b01492c5c3e9bd06 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.full.avg10)>{$CPU_FULL_AVG10_THRESH}' + name: 'Linux PSI - CPU Full Avg 10 - Exceeds Threshold' + priority: WARNING + manual_close: 'YES' + - uuid: c3dfbeddb44443029dce060bed67631a + name: 'CPU Pressure Stall Information - Full - 60s Average' + type: DEPENDENT + key: psi_mth.cpu.full.avg60 + delay: '0' + value_type: FLOAT + units: '%' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg60=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/cpu]' + tags: + - tag: component + value: cpu + triggers: + - uuid: fb184f6145c8488e8390f5915953ca23 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.full.avg60)>{$CPU_FULL_AVG60_THRESH}' + name: 'Linux PSI - CPU Full Avg 60 - Exceeds Threshold' + priority: WARNING + manual_close: 'YES' + - uuid: 23ac883311cd4ae69f1b789061cba4ce + name: 'CPU Pressure Stall Information - Full - 300s Average' + type: DEPENDENT + key: psi_mth.cpu.full.avg300 + delay: '0' + value_type: FLOAT + units: '%' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg300=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/cpu]' + tags: + - tag: component + value: cpu + triggers: + - uuid: 0c1f1b5713ba40efa9f279a39d3c4ab8 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.full.avg300)>{$CPU_FULL_AVG300_THRESH}' + name: 'Linux PSI - CPU Full Avg 300 - Exceeds Threshold' + priority: WARNING + manual_close: 'YES' + - uuid: a4d8b637aa3641be95cacc5fad76e322 + name: 'CPU Pressure Stall Information - Some - 10s Average' + type: DEPENDENT + key: psi_mth.cpu.some.avg10 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of stalled tasks on the CPU over the last 10s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/cpu]' + tags: + - tag: component + value: cpu + triggers: + - uuid: 25ed5e9c97b84f9092bb164ae005de18 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.some.avg10)>{$CPU_SOME_AVG10_THRESH}' + name: 'Linux PSI - CPU Some Avg 10 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 3e46db2cdb8a4410ad440c9458976d45 + name: 'CPU Pressure Stall Information - Some - 60s Average' + type: DEPENDENT + key: psi_mth.cpu.some.avg60 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of stalled tasks on the CPU over the last 60s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg60=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/cpu]' + tags: + - tag: component + value: cpu + triggers: + - uuid: d3ae497d2c08420291bde750fa06c91e + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.some.avg60)>{$CPU_SOME_AVG60_THRESH}' + name: 'Linux PSI - CPU Some Avg 60 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 3800255d9f674f1582f2edc3b84d516d + name: 'CPU Pressure Stall Information - Some - 300s Average' + type: DEPENDENT + key: psi_mth.cpu.some.avg300 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of stalled tasks on the CPU over the last 300s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg300=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/cpu]' + tags: + - tag: component + value: cpu + triggers: + - uuid: 617373272b9f44beb4c0de9b0f03f209 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.cpu.some.avg300)>{$CPU_SOME_AVG300_THRESH}' + name: 'Linux PSI - CPU Some Avg 300 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: ec632a2db14c40108806de311fa6c334 + name: 'IO Pressure Stall Information - Full - 10s Average' + type: DEPENDENT + key: psi_mth.io.full.avg10 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time all tasks were waiting on IO over the last 10s window.' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/io]' + tags: + - tag: component + value: io + triggers: + - uuid: 123729eeddcc480191ed21ac6301fa41 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.io.full.avg10)>{$IO_FULL_AVG10_THRESH}' + name: 'Linux PSI - IO Full Avg 10 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: ce93bcb1f20249bd977f744f7bdd1d86 + name: 'IO Pressure Stall Information - Full - 60s Average' + type: DEPENDENT + key: psi_mth.io.full.avg60 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time all tasks were waiting on IO over the last 60s window.' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg60=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/io]' + tags: + - tag: component + value: io + triggers: + - uuid: edbc8baf45a640848aea4962caf96777 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.io.full.avg60)>{$IO_FULL_AVG60_THRESH}' + name: 'Linux PSI - IO Full Avg 60 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 7d5bfc970af44a8aa8860f6cd7619566 + name: 'IO Pressure Stall Information - Full - 300s Average' + type: DEPENDENT + key: psi_mth.io.full.avg300 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time all tasks were waiting on IO over the last 300s window.' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg300=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/io]' + tags: + - tag: component + value: io + triggers: + - uuid: ef4c3ac8f34341ff881309274b65e7ce + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.io.full.avg300)>{$IO_FULL_AVG300_THRESH}' + name: 'Linux PSI - IO Full Avg 300 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: e2f9e4d6495d4396af498710c320cb54 + name: 'IO Pressure Stall Information - Some - 10s Average' + type: DEPENDENT + key: psi_mth.io.some.avg10 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time some tasks were waiting on IO over the last 10s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/io]' + tags: + - tag: component + value: io + triggers: + - uuid: cf228979864b4b4799047be32b461b2a + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.io.some.avg10)>{$IO_SOME_AVG10_THRESH}' + name: 'Linux PSI - IO Some Avg 10 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: d505c1681d7143c790cf05469380e179 + name: 'IO Pressure Stall Information - Some - 60s Average' + type: DEPENDENT + key: psi_mth.io.some.avg60 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time some tasks were waiting on IO over the last 60s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg60=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/io]' + tags: + - tag: component + value: io + triggers: + - uuid: 1c20d0e6c5c546abbd5739d8fc97bf92 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.io.some.avg60)>{$IO_SOME_AVG60_THRESH}' + name: 'Linux PSI - IO Some Avg 60 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 14d3db630441450999ac833a1b5e24ad + name: 'IO Pressure Stall Information - Some - 300s Average' + type: DEPENDENT + key: psi_mth.io.some.avg300 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time some tasks were waiting on IO over the last 300s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg300=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/io]' + tags: + - tag: component + value: io + triggers: + - uuid: fcf3054b6ff143a9b73cda347963b4c5 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.io.some.avg300)>{$IO_SOME_AVG300_THRESH}' + name: 'Linux PSI - IO Some Avg 300 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 1ce303afbd974ca39f085ae3db02b6f5 + name: 'Memory Pressure Stall Information - Full - 10s Average' + type: DEPENDENT + key: psi_mth.memory.full.avg10 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time all tasks were waiting on Memory over the last 10s window.' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/memory]' + tags: + - tag: component + value: memory + triggers: + - uuid: 1f81d338dca94d9cb5552cbd28c65ee7 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.memory.full.avg10)>{$MEMORY_FULL_AVG10_THRESH}' + name: 'Linux PSI - Memory Full Avg 10 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: b1df5a6b176e49e6b7786ac1416293b1 + name: 'Memory Pressure Stall Information - Full - 60s Average' + type: DEPENDENT + key: psi_mth.memory.full.avg60 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time all tasks were waiting on Memory over the last 60s window.' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/memory]' + tags: + - tag: component + value: memory + triggers: + - uuid: c92b50912d7448dc9724b273fe3e16d0 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.memory.full.avg60)>{$MEMORY_FULL_AVG60_THRESH}' + name: 'Linux PSI - Memory Full Avg 60 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: c37d28819c4f480986bda4d1c5539681 + name: 'Memory Pressure Stall Information - Full - 300s Average' + type: DEPENDENT + key: psi_mth.memory.full.avg300 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time all tasks were waiting on Memory over the last 300s window.' + preprocessing: + - type: REGEX + parameters: + - 'full.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/memory]' + tags: + - tag: component + value: memory + triggers: + - uuid: d544b97f3b29468999690b5d49009631 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.memory.full.avg300)>{$MEMORY_FULL_AVG300_THRESH}' + name: 'Linux PSI - Memory Full Avg 300 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 5573b30e93a7497fa6997f9cce19ef1b + name: 'Memory Pressure Stall Information - Some - 10s Average' + type: DEPENDENT + key: psi_mth.memory.some.avg10 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time tasks were waiting on Memory over the last 10s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg10=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/memory]' + tags: + - tag: component + value: memory + triggers: + - uuid: 2e812e0a2ecf434399646cff51216e18 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.memory.some.avg10)>{$MEMORY_SOME_AVG10_THRESH}' + name: 'Linux PSI - Memory Some Avg 10 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 5ecf814c22734002a070432f01ac0dc2 + name: 'Memory Pressure Stall Information - Some - 60s Average' + type: DEPENDENT + key: psi_mth.memory.some.avg60 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time tasks were waiting on Memory over the last 60s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg60=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/memory]' + tags: + - tag: component + value: memory + triggers: + - uuid: 6d72ba775b3c4d24a11b388196bbd1e4 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.memory.some.avg60)>{$MEMORY_SOME_AVG60_THRESH}' + name: 'Linux PSI - Memory Some Avg 60 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: e24d4a4e87de40b48214cad5a3c71732 + name: 'Memory Pressure Stall Information - Some - 300s Average' + type: DEPENDENT + key: psi_mth.memory.some.avg300 + delay: '0' + value_type: FLOAT + units: '%' + description: 'The percentage of time tasks were waiting on Memory over the last 300s window.' + preprocessing: + - type: REGEX + parameters: + - 'some.*avg300=(\d+.\d+)' + - \1 + master_item: + key: 'vfs.file.contents[/proc/pressure/memory]' + tags: + - tag: component + value: memory + triggers: + - uuid: d7eddd3abf324002898918ddb52c74b6 + expression: 'last(/Linux Pressure Stall Information - PSI/psi_mth.memory.some.avg300)>{$MEMORY_SOME_AVG300_THRESH}' + name: 'Linux PSI - Memory Some Avg 300 - Exceeds Threshold' + priority: INFO + manual_close: 'YES' + - uuid: 17b344a3dae442c98149fab9437b4b40 + name: 'CPU Pressure Stall Information - Text' + key: 'vfs.file.contents[/proc/pressure/cpu]' + delay: 5s + trends: '0' + value_type: TEXT + description: 'Service item for gathering cpu ''some'' pressure (10s,60s,300s)' + tags: + - tag: component + value: cpu + - uuid: b78af20d636a4c8094f1161bc1518caf + name: 'IO Pressure Stall Information - Text' + key: 'vfs.file.contents[/proc/pressure/io]' + delay: 5s + trends: '0' + value_type: TEXT + description: 'Service item for gathering io ''some'' and ''full'' pressure (10s,60s,300s)' + tags: + - tag: component + value: io + - uuid: a36abe771ad5456981a0ae2d26569507 + name: 'Memory Pressure Stall Information - Text' + key: 'vfs.file.contents[/proc/pressure/memory]' + delay: 5s + trends: '0' + value_type: TEXT + description: 'Service item for gathering memory ''some'' and ''full'' pressure (10s,60s,300s)' + tags: + - tag: component + value: memory + tags: + - tag: accounting + value: pressure + - tag: data + value: raw + macros: + - macro: '{$CPU_FULL_AVG10_THRESH}' + value: '0' + description: 'CPU starvation for ALL processes over 10 seconds' + - macro: '{$CPU_FULL_AVG60_THRESH}' + value: '0' + description: 'CPU starvation for ALL processes over 60 seconds' + - macro: '{$CPU_FULL_AVG300_THRESH}' + value: '0' + description: 'CPU starvation for ALL processes over 300 seconds' + - macro: '{$CPU_SOME_AVG10_THRESH}' + value: '75' + description: 'CPU starvation for some processes over 10 seconds' + - macro: '{$CPU_SOME_AVG60_THRESH}' + value: '50' + description: 'CPU starvation for some processes over 60 seconds' + - macro: '{$CPU_SOME_AVG300_THRESH}' + value: '25' + description: 'CPU starvation for some processes over 300 seconds' + - macro: '{$IO_FULL_AVG10_THRESH}' + value: '10' + description: 'IO starvation for ALL processes over 10 seconds' + - macro: '{$IO_FULL_AVG60_THRESH}' + value: '5' + description: 'IO starvation for ALL processes over 60 seconds' + - macro: '{$IO_FULL_AVG300_THRESH}' + value: '1' + description: 'IO starvation for ALL processes over 300 seconds' + - macro: '{$IO_SOME_AVG10_THRESH}' + value: '50' + description: 'IO starvation for some processes over 10 seconds' + - macro: '{$IO_SOME_AVG60_THRESH}' + value: '10' + description: 'IO starvation for some processes over 60 seconds' + - macro: '{$IO_SOME_AVG300_THRESH}' + value: '5' + description: 'IO starvation for some processes over 300 seconds' + - macro: '{$MEMORY_FULL_AVG10_THRESH}' + value: '10' + description: 'Memory starvation for ALL processes over 10 seconds' + - macro: '{$MEMORY_FULL_AVG60_THRESH}' + value: '5' + description: 'Memory starvation for ALL processes over 60 seconds' + - macro: '{$MEMORY_FULL_AVG300_THRESH}' + value: '1' + description: 'Memory starvation for ALL processes over 300 seconds' + - macro: '{$MEMORY_SOME_AVG10_THRESH}' + value: '50' + description: 'Memory starvation for some processes over 10 seconds' + - macro: '{$MEMORY_SOME_AVG60_THRESH}' + value: '10' + description: 'Memory starvation for some processes over 60 seconds' + - macro: '{$MEMORY_SOME_AVG300_THRESH}' + value: '5' + description: 'Memory starvation for some processes over 300 seconds' + graphs: + - uuid: 05b092a42ddd41cb81edaac1c3783c97 + name: 'CPU Pressure Stall Information' + graph_items: + - drawtype: FILLED_REGION + color: 199C0D + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.cpu.full.avg10 + - sortorder: '1' + drawtype: FILLED_REGION + color: F63100 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.cpu.full.avg60 + - sortorder: '2' + drawtype: FILLED_REGION + color: 2774A4 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.cpu.full.avg300 + - sortorder: '3' + color: F7941D + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.cpu.some.avg10 + - sortorder: '4' + color: FC6EA3 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.cpu.some.avg60 + - sortorder: '5' + color: 6C59DC + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.cpu.some.avg300 + - uuid: b93898342c504cd5ac67190abe6d92eb + name: 'IO Pressure Stall Information' + graph_items: + - drawtype: FILLED_REGION + color: 199C0D + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.io.full.avg10 + - sortorder: '1' + drawtype: FILLED_REGION + color: F63100 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.io.full.avg60 + - sortorder: '2' + drawtype: FILLED_REGION + color: 2774A4 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.io.full.avg300 + - sortorder: '3' + color: F7941D + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.io.some.avg10 + - sortorder: '4' + color: FC6EA3 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.io.some.avg60 + - sortorder: '5' + color: 6C59DC + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.io.some.avg300 + - uuid: 7cf1c476fbd943ba9bcf76ad9e738ccb + name: 'Memory Pressure Stall Information' + graph_items: + - drawtype: FILLED_REGION + color: 199C0D + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.memory.full.avg10 + - sortorder: '1' + drawtype: FILLED_REGION + color: F63100 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.memory.full.avg60 + - sortorder: '2' + drawtype: FILLED_REGION + color: 2774A4 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.memory.full.avg300 + - sortorder: '3' + color: F7941D + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.memory.some.avg10 + - sortorder: '4' + color: FC6EA3 + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.memory.some.avg60 + - sortorder: '5' + color: 6C59DC + calc_fnc: ALL + item: + host: 'Linux Pressure Stall Information - PSI' + key: psi_mth.memory.some.avg300