Hello, i am setting up monit for monitoring our servers. Please check my configuration file for monitrc. For some reason it starts failing from check filesystem rule which i have highlighted below. If i comment that whole section of for the check file system.. the error starts moving on to the next rules down................. not sure how to trouble shoot this error. This has been really frustrating. Any help is appreciated here...................
set daemon 120 # check services at 2-minute intervals
set logfile /var/log/monit.log
set idfile /var/lib/monit/id
set statefile /var/lib/monit/state
using tlsv1 with timeout 30 seconds
set eventqueue
basedir /var/lib/monit/events
slots 100
set mail-format {
subject: monit alert -- $EVENT $SERVICE
message: $EVENT Service $SERVICE
Received:
Date: $DATE
Action: $ACTION
Host: $HOST
Description: $DESCRIPTION
}
set httpd port 8181 and # Port used for the WEB access, listen on LOCALHOST
use address localhost
allow admin:monit # Username/password
###############################################################################
## Services
###############################################################################
check system localhost
if loadavg (1min) > 4 then alert
if loadavg (5min) > 2 then alert
if memory usage > 75% then alert
if swap usage > 25% then alert
if cpu usage (user) > 70% then alert
if cpu usage (system) > 30% then alert
if cpu usage (wait) > 20% then alert
# Check the binary Apache file (MD5 checksum + permission + UID + GUID)
check file apache_bin with path /usr/lib/apache2/mpm-prefork/apache2
if failed permission 755 then unmonitor
if failed uid root then unmonitor
if failed gid root then unmonitor
group web
# Check apache service (service alive + resource consumption)
check process apache with pidfile /var/run/apache2.pid
start program = "/etc/init.d/apache2 start" with timeout 20 seconds
stop program = "/etc/init.d/apache2 stop"
if children > 150 then alert
if children > 250 then restart
if loadavg(5min) greater than 15 for 8 cycles then stop
if totalcpu > 50% for 2 cycles then alert
if totalcpu > 80% for 5 cycles then restart
if failed host 127.0.0.1 port 80 protocol http
then restart
if 5 restarts within 5 cycles then timeout and alert
group web
# Check ROOT filesystem space usage--------------------------------> Sytax error start from the check filesystem rule below.
check filesystem rootfs with path /
if space usage > 80% for 5 times within 15 cycles then alert
group server
# Check DATA filesystem space usage
check filesystem datafs with path /dev/xvda1
if space usage > 80% for 5 times within 15 cycles then alert
group server
# Check mysqld service (depends mysqld binary + init.d startup script)
check process mysqld with pidfile /var/run/mysqld/mysqld.pid
group database
start program = "/etc/init.d/mysql start" with timeout 20 seconds
stop program = "/etc/init.d/mysql stop"
if failed host 127.0.0.1 port 3306 protocol mysql then restart
if 5 restarts within 5 cycles then timeout and alert
# Check Tomcat7
check process tomcat7 with pidfile /var/run/tomcat7.pid
start program = "/etc/init.d/tomcat7 start" with timeout 60 seconds
stop program = "/etc/init.d/tomcat7 stop"
if failed host 127.0.0.1 port 8080 protocol http then restart
if 5 restarts within 5 cycles then timeout and alert
group web
# Check mongodb process
check process mongodb with pidfile /var/lib/mongodb/mongod.lock
group database
start program = "/etc/init.d/mongodb start" with timeout 20 seconds
stop program = "/etc/init.d/mongodb stop"
if failed host 127.0.0.1 port 27017 protocol http for 3 times within 5 cycles then restart
if 5 restarts within 5 cycles then timeout and alert