Skip to content

Commit

Permalink
Merge pull request #2 from sawft99/AddUptimeScript
Browse files Browse the repository at this point in the history
Add uptime script
  • Loading branch information
sawft99 authored Jun 13, 2024
2 parents 98ef440 + c9937a2 commit 15680de
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CheckWinLocalAccounts.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ if ($ShouldNotBeAdmins.count -gt 0) {
$Output = 'Accounts: '
$Output += $MissingLocalAccounts.Name -join ', '
Write-Output $Output
$LASTEXITCODE
$LASTEXITCODE = 1
} elseif ($ShouldNotBeEnabledAccounts.count -gt 0) {
Write-Output 'WARNING: Some accounts that should NOT be enabled are'
$Output = 'Accounts: '
Expand Down
29 changes: 28 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Various Nagios plugins
### Example

- `check_ncpa.py -t 'TOKEN' -P 5693 -M 'plugins/CheckScriptCertExperation.ps1/14/7'`
- This will send a warning alert when a signature is about to expire in 14 or less days and then change to critical when it expires in 7 or less days
- This will send a warning alert when a signature is about to expire in 14 or less days and then change to critical when it expires in 7 or less days

## [CheckWinLocalAccounts.ps1](./CheckWinLocalAccounts.ps1)

Expand All @@ -40,3 +40,30 @@ Various Nagios plugins
### Example

- `check_ncpa.py -t 'TOKEN' -P 5693 -M 'plugins/CheckWinLocalAccounts.ps1'`

## [ShutdownAndUptime.ps1](./ShutdownAndUptime.ps1)

- Checks if server has been up for X amount of time
- Measured in total hours i.e. 3 days = 72 hours
- Variables can specify critical threshold, warning threshold and how far to look back in the event log
- Also measured in total hours
- Will also return
- A CRITICAL value if any Error or Critical level event is found
- A WARNING value if any Warninng level event is found
- Errors detected (Non info) in events will override uptime settings
- See example
- **<ins>Currently not on Nagios Exchange</ins>**

### Arguments

- WARNING: Threshold for the minimum amount of time the server has to be up
- CRITICAL: Threshold for more recently rebooted servers
- EventAge: How far back to look in the event log for reboot events

### Example

- `check_ncpa.py -t 'TOKEN' -P 5693 -M 'plugins/ShutdownAndUptime.ps1/24/12/24'`
- Warning if the server has been up for less than 24 hours, Critical if it has been up for less than 12, look back 24 hours in the event log for reboot events
- `check_ncpa.py -t 'TOKEN' -P 5693 -M 'plugins/ShutdownAndUptime.ps1/6/4/24'`
- Warning if the server has been up for less than 6 hours, Critical if it has been up for less than 4, look back 24 hours in the event log for reboot events
- With the EventAge variable being larger than the uptime thresholds, it will report Critical, Error, & Warning level events with the appropriate exit codes even if uptime exceeds 6 or 4 hours
137 changes: 137 additions & 0 deletions ShutdownAndUptime.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#Checks for shutdown events and uptime

#Reference
#Event Log ID | Meaning
#41 | The system has rebooted without cleanly shutting down first
#1074 | The system has been shutdown properly by a user or process
#1076 | Follows after EventLog.Id ID 6008 and means that the first user with shutdown privileges logged on to the server after an unexpected restart or shutdown and specified the cause
#6005 | The EventLog.Id Log service was started. Indicates the system startup
#6006 | The EventLog.Id Log service was stopped. Indicates the proper system shutdown
#6008 | The previous system shutdown was unexpected
#6009 | The operating system version detected at the system startup
#6013 | The system uptime in seconds

[int32]$WarningUptime = $args[0] #Uptime in total hours. Example, if you want 3 days enter 72
[int32]$CriticalUptime = $args[1] #Uptime in total hours.
[int32]$MaxEventAge = $args[2] #In total hours. Don't look back more than X hours in the event log

#-----------------

#Validate input
if ($Error.Count -gt 0) {
$LASTEXITCODE = 3
Clear-Host
Write-Output 'UNKNOWN: Only use Int32 numbers for the arguments'
exit $LASTEXITCODE
}

#Reboot & Shutdown EventLog.Id IDs
$EventIDs = @(41,1074,1076,6005,6006,6008)

#Measure Uptime
$CurrentTime = Get-Date
$LastBoot = (Get-CimInstance -ClassName Win32_OperatingSystem | Select-Object -Property LastBootUpTime).LastBootUpTime
$ActualUptime = $CurrentTime - $LastBoot
[int32]$ActualUptime = $ActualUptime.TotalHours
#$ActualUptime = (Get-Uptime).TotalHours #If PS7
$UptimeMessage = "Server has been up for $ActualUptime hours"

#Get events
$Events = Get-WinEvent -LogName system -Oldest | Where-Object -Property Id -in $EventIDs
$EventsFiltered = $Events | Where-Object {(($CurrentTime - $_.TimeCreated).TotalHours) -le $MaxEventAge} | Select-Object -Property *

#Reformat event info
$EventsReformat = foreach ($EventLog in $EventsFiltered) {
$NewEvent = [PSCustomObject]@{
Time = $EventLog.TimeCreated
ID = $EventLog.Id
Level = $EventLog.LevelDisplayName
Container = $EventLog.ContainerLog
ProviderName = $EventLog.ProviderName
Log = $EventLog.LogName
User = $EventLog.UserId
Message = $EventLog.Message
}
#Determine SID type
$SIDID1 = ($EventLog.Id.ToString().trimstart('S-1-5-') -split '-') | Select-Object -First 1
$SIDTest1 = $SIDID1 -in 0..20
#Test for Get-ADUser abilities. Will reports AD user if able
$ADUserAbility = (Get-Command Get-ADUser -ErrorAction SilentlyContinue).count -gt 0
if ($null -eq $EventLog.UserId) {
$NewEvent.User = 'None/Unknown'
} elseif ($SIDTest1 -eq $false) {
if ($ADUserAbility -eq $true) {
$NewEvent.User = (Get-ADUser -identity $EventLog.UserId).SamAccountName
} else {
$NewEvent.User = $NewEvent.User.ToString() + ' (Undetermined AD account)'
}
} else {
if ($EventLog.UserId -eq 'S-1-5-19') {
$NewEvent.User = 'NT Authority (LocalService)'
} elseif ($EventLog.UserId -eq 'S-1-5-18') {
$NewEvent.User = 'System'
} else {
$NewEvent.User = $NewEvent.User.ToString() + ' (Undetermined local account)'
}
}
$NewEvent
}

#Get counts of event level types
$CriticalCount = ($EventsReformat | Where-Object -Property Level -EQ 'Critical').Count
$ErrorCount = ($EventsReformat | Where-Object -Property Level -EQ 'Error').Count
$WarningCount = ($EventsReformat | Where-Object -Property Level -EQ 'Warning').Count
$InfoCount = ($EventsReformat | Where-Object -Property Level -EQ 'Information').Count
$RebootEventCount = $CriticalCount + $ErrorCount + $WarningCount + $InfoCount

#Set exit depending on types of events found
if (($CriticalCount -gt 0) -or ($ErrorCount -gt 0)) {
$LASTEXITCODE = 2
} elseif ($WarningCount -gt 0) {
$LASTEXITCODE = 1
} elseif ($InfoCount -gt 0) {
$LASTEXITCODE = 0
} elseif ($RebootEventCount -lt 1) {
$LASTEXITCODE = 0
} else {
$LASTEXITCODE = 3
}

#Adjust exit code based on uptime
if ($ActualUptime -lt $WarningUptime) {
$LASTEXITCODE = 1
if (($CriticalCount -gt 0) -or ($ErrorCount -gt 0)) {
$LASTEXITCODE = 2
}
if ($ActualUptime -lt $CriticalUptime) {
$LASTEXITCODE = 2
}
} elseif ($ActualUptime -ge $WarningUptime) {
$LASTEXITCODE = $LASTEXITCODE
} else {
$LASTEXITCODE = 3
}

#Output
$UptimeMessage = $UptimeMessage + " with $RebootEventCount reboot events in the past $MaxEventAge hours"
if ($LASTEXITCODE -eq 0) {
'OK: ' + $UptimeMessage
} elseif ($LASTEXITCODE -eq 1) {
'WARNING: ' + $UptimeMessage
} elseif ($LASTEXITCODE -eq 2) {
'CRITICAL: ' + $UptimeMessage
} else {
'UNKNOWN: ' + $UptimeMessage
}
Write-Output $UptimeMessage
Write-Output "Warning trigger: $WarningUptime hours"
Write-Output "Critical trigger: $CriticalUptime hours"
Write-Output "Max event age: $MaxEventAge hours"
if ($EventsReformat.Count -gt 0) {
Write-Output '
=====
Events
====='
Write-Output $EventsReformat
}
exit $LASTEXITCODE

0 comments on commit 15680de

Please sign in to comment.