#!/bin/bash # # Copyright Hank Leininger # Jay Smith # KoreLogic, Inc VERSION='$Id: check_dragon,v 1.13 2007/10/03 04:25:02 hlein Exp $' VERSION=`echo "$VERSION" | sed 's/.*,v //; s/ .*//;'` # This script runs some simple tests to see if the expected Dragon # client/server processes are running (the EFP replicator, and the # policy 'Dragon Rider' client). # Suitable to be run from cron, or as a WebJob (http://webjob.sf.net). # Currently checks dragonctl output for 'running' state, and # netstat output for the expected ESTABLISHED sessions. (You will # have to update REPL_PORT_REGEX and/or DRIDER_PORT_REGEX if other # than the default 9111 and 9112). Those have been sufficient to # catch the error-states we've seen most often on our sensors. # Modify DRAGON_RC appropriately if your Dragon startup script is # elsewhere (such as /etc/init.d/rc.dragon). REPL_PORT_REGEX='911[24]' DRIDER_PORT_REGEX='9111' if [ -x /etc/init.d/dragon ]; then DRAGON_RC=/etc/init.d/dragon elif [ -x /etc/rc.d/rc.dragon ]; then DRAGON_RC=/etc/rc.d/rc.dragon else echo "Cannot find the Dragon RC script" | logger -t check_dragon exit 1 fi if [ -d /var/dragon ]; then DRAGON_PATH=/var/dragon elif [ -d /opt/dragon ]; then DRAGON_PATH=/opt/dragon elif [ -d /usr/dragon ]; then DRAGON_PATH=/usr/dragon else echo "Cannot find Dragon install directory" | logger -t check_dragon exit 1 fi # We only know how to work on dragon 5-6 and 7 if [ -f ${DRAGON_PATH}/dragon.cfg ]; then DRAGON_VERSION=6 DRAGONCTL_PATH=${DRAGON_PATH} elif [ -f ${DRAGON_PATH}/conf/net-cfg-client.xml ]; then DRAGON_VERSION=7 DRAGONCTL_PATH=${DRAGON_PATH}/bin else exit 0 fi restart_dragon() { PROC=$1 logger -t check_dragon "restarting $PROC" if [ -x "$DRAGON_RC" ]; then "$DRAGON_RC" stop for A in 1 2 3 4 5 6 ; do sleep 10 # Dragon sensor processes sometimes don't respect graceful kills killall -9 dragon 2>/dev/null && sleep 5 # make sure things have shut down before we try to start them again check_running if [ "$SENSOR_RUNNING" -le 0 -a \ "$DRIDER_RUNNING" -le 0 -a \ "$REPL_RUNNING" -le 0 ]; then "$DRAGON_RC" start return fi killall -9 dragonctl dragon net-cfg-client net-event-channel 2>/dev/null sleep 10 done echo "Old Dragon process will not die, cannot restart" | \ logger -t check_dragon exit 1 else echo "I want to restart dragon, but no valid RC script $DRAGON_RC" | \ logger -t check_dragon fi sleep 5 } check_running() { DRIDER_RUNNING=0 REPL_RUNNING=0 SENSOR_RUNNING=0 DRAGONCTL=`./dragonctl 2>&1` echo "$DRAGONCTL" | egrep '(Replicator[0-9]*|EventChannel) *running' >/dev/null && REPL_RUNNING=1 echo "$DRAGONCTL" | egrep '(Dragon|Network)Sensor *running' >/dev/null && SENSOR_RUNNING=1 # pidof should report exactly one PID even for multithreaded processes if [ "`pidof dragon | wc -w`" != "1" ]; then # There's either zero, or more than one instance of the dragon sensor running # Either case is bad; restart_dragon can fix it SENSOR_RUNNING=0 fi # Dragon 5-6 show 'DragonRider' in the dragonctl output if echo "$DRAGONCTL" | egrep 'DragonRiderSensor *running' >/dev/null ; then DRIDER_RUNNING=1 # For Dragon 7 we must check the process table # N.B. tweak the ps options if you have a sysv ps (i.e. Solaris) elif ps auxwww | egrep './n\et-cfg-client' >/dev/null ; then DRIDER_RUNNING=1 fi DRAGON_EST=`netstat -an | \ egrep "[.:]($REPL_PORT_REGEX|$DRIDER_PORT_REGEX) +[0-9.:]* *EST"` if [ "$REPL_RUNNING" = "1" ]; then echo "$DRAGON_EST" | egrep "[.:]$REPL_PORT_REGEX +" >/dev/null || \ REPL_RUNNING=0 fi if [ "$DRIDER_RUNNING" = "1" ]; then echo "$DRAGON_EST" | egrep "[.:]$DRIDER_PORT_REGEX +" >/dev/null || \ DRIDER_RUNNING=0 fi # sanity check: are these *supposed* to be running? # if not, fudge the state variable so we "know" everything is OK if [ "$DRAGON_VERSION" = 6 ]; then egrep 'process Replicator' ${DRAGON_PATH}/dragon.cfg >/dev/null || \ REPL_RUNNING=1 egrep 'process DragonRiderSensor' ${DRAGON_PATH}/dragon.cfg >/dev/null || \ DRIDER_RUNNING=1 egrep 'process DragonSensor' ${DRAGON_PATH}/dragon.cfg >/dev/null || \ SENSOR_RUNNING=1 elif [ -f ${DRAGON_PATH}/conf/host.xml ]; then egrep -i 'Process name="EventChannel".*enable="y"' \ ${DRAGON_PATH}/conf/host.xml >/dev/null || REPL_RUNNING=-1 egrep -i 'Process name="NetworkSensor".*enable="y"' \ ${DRAGON_PATH}/conf/host.xml >/dev/null || SENSOR_RUNNING=-1 fi } check_timestamps() { CHECKDURATION=630 DRAGONCTLLOG=${DRAGON_PATH}/logs/dragonctl.log SENSORLOG=${DRAGON_PATH}/logs/NetworkSensor.log if [ ! -f ${DRAGONCTLLOG} -o ! -f ${SENSORLOG} ]; then # We can't take a timestamp from a file that doesn't exist... return elif [ -f ${DRAGON_PATH}/conf/host.xml ]; then # Make sure this box is supposed to be running the NetworkSensor egrep -i 'Process name="NetworkSensor".*enable="y"' \ ${DRAGON_PATH}/conf/host.xml >/dev/null || return fi CUR_TIMESTAMP=`date +%s` DRAGONCTL_STAT=`stat -t $DRAGONCTLLOG 2>/dev/null` read NAME SIZE BLOCKS UNK MYUID GID DEV INODE LINKS UNK2 UNK3 LAST_ACCESS CTL_LAST_MOD LAST_CHANGE IOBLOCK << ENDHERE $(echo $DRAGONCTL_STAT) ENDHERE SENSORLOG_STAT=`stat -t $SENSORLOG 2>/dev/null` read NAME SIZE BLOCKS UNK MYUID GID DEV INODE LINKS UNK2 UNK3 LAST_ACCESS SENSOR_LAST_MOD LAST_CHANGE IOBLOCK << ENDHERE $(echo $SENSORLOG_STAT) ENDHERE CTLTIME_DIFF=`expr $CUR_TIMESTAMP - $CTL_LAST_MOD` SENSORTIME_DIFF=`expr $CUR_TIMESTAMP - $SENSOR_LAST_MOD` if [ $CTLTIME_DIFF -gt $CHECKDURATION -a \ $SENSORTIME_DIFF -gt $CHECKDURATION ]; then echo "Dragon does not appear to be alerting. Restarting dragon." | \ logger -t check_dragon restart_dragon sleep 30 fi } cd $DRAGONCTL_PATH check_running # More often than not, if something has gone wrong, the # shared memory segment is corrupt, so all of Dragon should # be restarted; restarting just one piece is pointless. if [ "$SENSOR_RUNNING" = "0" -o \ "$DRIDER_RUNNING" = "0" -o \ "$REPL_RUNNING" = "0" ]; then restart_dragon sleep 30 fi # A condition can result where dragon appears to be running # but not actually alerting. We check the timestamps of # NetworkSensor.log and dragonctl.log to see if they are # older than 10 mins. If they are, then we need to restart. if [ "$DRAGON_VERSION" = 7 ]; then check_timestamps fi check_running if [ "$DRIDER_RUNNING" = "0" ]; then logger -t check_dragon 'tried and failed to restart the Policy client!' fi if [ "$REPL_RUNNING" = "0" ]; then logger -t check_dragon 'tried and failed to restart the Event client!' fi if [ "$SENSOR_RUNNING" = "0" ]; then logger -t check_dragon 'tried and failed to restart the Sensor!' fi