Can someone please help how can I add Nagios logic to catch alerts to my below python script?
I tried adding the sys.exit(0) and sys.exit(1) for all OK and CRITICAL, Or Please Let me know what I should do, So that this script when run Nagios catch the 0,1,2 and display the message.
#!/usr/bin/python
import subprocess
import os, sys
#Check python present or not
# dnf install python3.6-stack
# export PATH=/opt/python-3.6/bin:$PATH
def check_MegaRaid():
# Next script
failed=subprocess.run(["sudo /opt/MegaRAID/MegaCli/MegaCli64 -AdpAllInfo \ -aALL | grep -i 'Failed Disks' | awk -F':' '{print $2}'"], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
failed_status = failed.stdout
print("failed_status is",failed_status)
critical=subprocess.run(["sudo /opt/MegaRAID/MegaCli/MegaCli64 -AdpAllInfo \ -aALL | grep -i 'Critical Disks' | awk -F':' '{print $2}'"], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
critical_status = critical.stdout
print("critical_status is",critical_status)
if failed_status.strip() and critical_status.strip() == "0" :
print("Raid check all OK" )
sys.exit(0)
#return 0
else:
print("CRITICAL")
sys.exit(1)
#return 1
def check_raid():
process=subprocess.run(["sudo /sbin/mdadm --detail /dev/md127 | grep -i state | grep -w clean, | awk -F',' '{print $2}' |sed -e 's/^[ \t]*//' "], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
output = process.stdout
check_process=subprocess.run(["sudo /sbin/mdadm --detail /dev/md127 | grep -i state | awk -F':' '{print $2}' |sed -e 's/^[ \t]*//' "], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
check = check_process.stdout
if output.strip() == 'degraded':
print("Raid disk state is CRITICAL ",output)
#return 1
sys.exit(1)
elif check.strip() == 'clean':
print("Raid check all OK")
#return 0
sys.exit(0)
else:
print("sudo /sbin/mdadm --detail /dev/md127 cmd not found : This is an dataraid machine")
check_MegaRaid()
#Check whether system configure raid
process=subprocess.run(["sudo cat /GEO_VERSION | grep -i raid | awk -F'Layout:' '{print $2}' | sed 's/[0-9]*//g' | sed -e 's/^[ \t]*//'"], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
raid_value = process.stdout
if raid_value.strip() == 'raid':
print("System configure Raid functions")
check_raid()
else:
print("There is no raid configured in this system")
exit()
Referencing https://nagios-plugins.org/doc/guidelines.html in case you're interested.
0 is OK 1 is Warning 2 is Critical 3 is Unknown
So the first thing you need to do is replace your
sys.exit(1)
with asys.exit(2)
I would also replace that final
exit()
with asys.exit(3)
to signal that it is an Unknown exit, which will you help you identify mis-configured services in the UI.You'll also want to indicate the status first, a typical one-line plugin output will look like:
But it doesn't look like you're using performance data, so change your critical exits to be prepended with the characters
CRITICAL:
and your OK statuses withOK:
.