#!/bin/perl -w
#+
# testAll -- check functionality of all DEIMOS systems
#
# Purpose:
#	Test all of the DEIMOS systems to determine whether they are
#	working properly.  The following checks are made:
#	- Check that all daemons are running
#	- Check all keyword libraries (tests infopatcher)
#	- Check that science/FCS CCD crate is alive via 'ping'
#	- Check that science/FCS CCD keywords are readable
#	- Check communication with Lantronix
#	- Check communication with Galil controllers
#	- Check that guider is powered on
#	- Check that various instrument stages will report back
#	  positions via 'show' command
#
# Usage:
#	testAll [systems]
# 
# Arguments:
#	systems = [computers daemons apps keywords power stages]
#		Systems which should be tested.  Defeault is all of them.
# 
# Output:
#	Report written to stdout
# 
# Exit values:
#	 0 = normal completion
#	<0 = warnings but no errors
#	>0 = error
#
# Example:
#	1) To check instrument status:
#		testAll
#
# Nominal output:
#       Checking DEIMOS computers:
#         Checking deits2..................OK (Lantronix - cradle)
#         Checking deits3..................OK (Lantronix - barrel)
#         Checking deivmep.................OK (Science CCD crate)
#         Checking fcsvmep.................OK (FCS CCD crate)
#         Checking keamanop................OK (supervisory computer - Sun)
#         Checking rotop...................OK (rotator control - Linux box)
#       Checking DEIMOS daemons:
#         Checking deirot.cache............OK
#         Checking deirot.dispatcher.......OK
#         Checking deirot.watchdcs.........OK
#         Checking dispatcher.barco........OK
#         Checking dispatcher.bargun.......OK
#         Checking dispatcher.dinfo........OK
#         Checking dispatcher.hplog........OK
#         Checking dispatcher.piezo........OK
#         Checking dispatcher2.1...........OK
#         Checking dispatcher2.2...........OK
#         Checking dremel..................OK
#         Checking lickserv2...............OK
#         Checking monitor.deifcs..........OK
#         Checking monitor.deimos..........OK
#         Checking monitor.deirot..........OK
#         Checking traffic.................OK
#         Checking watch_ccd...............OK
#       Checking DEIMOS applications:
#         Checking write_image.............OK
#         Checking fcstrack................OK
#         Checking ds9relay................OK
#         Checking lickserv................OK
#       Checking DEIMOS keyword libraries:
#         Checking ACS.....................OK
#         Checking CCD+infopatcher.........OK
#         Checking DCS.....................OK
#         Checking FCS+infopatcher.........OK
#         Checking bargun..................OK
#         Checking dispatcher 1............OK
#         Checking dispatcher 2............OK
#         Checking hplogger................OK
#         Checking piezo...................OK
#         Checking rotator.................OK
#       Checking DEIMOS settings:
#         Checking dewar ion pump..........OK 
#         Checking Rotator CW limit........OK 
#         Checking vessel ion pump.........OK 
#         Checking FCS lamp................OK 
#         Checking Science CCD 15V Power...OK 
#         Checking FCS focus tolerance 1...OK 
#         Checking Science CCD 30V Power...OK 
#         Checking FCS focus tolerance 2...OK 
#         Checking CCD temp setpoint.......OK 
#         Checking CCD temperature.........OK 
#         Checking FCS CCD 15V Power.......OK 
#         Checking FCS CCD 30V Power.......OK 
#         Checking Current instrument......OK 
#         Checking Rotator CCW limit.......OK 
#       ---------------------------------------------------------------
#             All tested DEIMOS systems tested appear functional.
#       ---------------------------------------------------------------
#-
# Modification history:
#	2004-Mar-03	GDW	Original version
#	2004-Dec-08	GDW	Added check for TEMPSET
#
# TBD:
#	sanity checks for FCS
#-----------------------------------------------------------------------

use strict;

# misc definitions...
($X::good, $X::warning, $X::error) = ("OK", "WARNING!", "ERROR!");
my( $n_errors) = 0; # number of errors
my( $n_warnings) = 0; # number of errors
my( $key, $value);
my( $type, $pid);
my( $daemon_name);
my( $computer);
my( @command);
my( $system);
my( $command);
my( $state, $message);

# define all possible systems to test...
my( @systems) = qw( computers daemons apps keywords power settings);

# default state for each system is "off"...
my( %check);
foreach $system ( @systems ) {
  $check{$system} = 0
}

# define usage...
my( $cmd) = $0;
if ( $cmd =~ m|/(.*)$| ) { $cmd = $1 };
my( $usage) = "Usage: $cmd [" . join( " ", @systems) . "]";

# default: if no args are specified, check all systems...
if ( @ARGV < 1 ) {
  foreach $system ( @systems ) {
    $check{$system} = 1
  }
}

# if args are specified, turn on certain systems...
while ( $#ARGV >= 0 ) {

  $system = shift;

  if( defined( $check{$system})){
    $check{$system} = 1 
  } else {
    die "$usage\n"
  }
}

# verify host...
if( $ENV{'HOST'} !~ /^polo/ ){
  die "this command must be run on polo\n"
}

# who am I?
# if( $ENV{'USER'} !~ /^dmoseng/ ){
#   die "this command must be run by dmoseng account; you are $ENV{'USER'}\n"
# }

#----------------------------------------
# Check computers
#----------------------------------------
if ( $check{"computers"} ) {

  # see whether programs are running...
  printf "Checking DEIMOS computers:\n";

  my( %computer_description);
  $computer_description{'keamanop'} = 'supervisory computer - Sun';
  $computer_description{'rotop'}    = 'rotator control - Linux box';
  $computer_description{'deits2'}   = 'Lantronix - cradle';
  $computer_description{'deits3'}   = 'Lantronix - barrel';
  $computer_description{'deivmep'}  = 'Science CCD crate';
  $computer_description{'fcsvmep'}  = 'FCS CCD crate';

  foreach $computer ( sort keys %computer_description){
    print &labelize("Checking $computer");
    if( system( "ping $computer 1 > /dev/null") == 0){
      print "$X::good (" . $computer_description{$computer} . ")\n"
    } else {
      print "$X::error (" . $computer_description{$computer} . ")\n";
      $n_errors++;
    }
  }
}

#----------------------------------------
# Check daemons
#----------------------------------------

if ( $check{"daemons"} ) {

  # see whether programs are running...
  printf "Checking DEIMOS daemons:\n";

  my( %daemon2command, %command2daemon);
  
  # define translation from daemon name to corresponding string in
  # "deimos status daemons" output...
  $daemon2command{"traffic"}           = "traffic";
  $daemon2command{"dispatcher.dinfo"}  = "dispatcher.tcl deimos dinfo";
  $daemon2command{"watch_ccd"}         = "watch_ccd";
  $daemon2command{"dispatcher2.1"}     = "dispatcher2 -s deimot -n 1";
  $daemon2command{"dispatcher2.2"}     = "dispatcher2 -s deimot -n 2";
  $daemon2command{"dispatcher.piezo"}  = "dispatcher.tcl deimot piezo";
  $daemon2command{"dispatcher.hplog"}  = "dispatcher.tcl deimot hplog";
  $daemon2command{"dispatcher.barco"}  = "dispatcher.tcl deimot barco";
  $daemon2command{"dispatcher.bargun"} = "dispatcher.tcl deimot bargun";
  $daemon2command{"dremel"}            = "dremel";
  $daemon2command{"monitor.deimos"}    = "krul deimos.rul";
  $daemon2command{"monitor.deifcs"}    = "krul deifcs.rul";
  $daemon2command{"monitor.deirot"}    = "krul deirot.rul";
  $daemon2command{"lickserv2"}         = "lickserv2";
  $daemon2command{"deirot.cache"}      = "deirot.cache";
  $daemon2command{"deirot.dispatcher"} = "deirot.dispatche";
  $daemon2command{"deirot.watchdcs"}   = "deirot.watchdcs";

  # invert the hash to allow reverse lookup...
  while (($key,$value) = each %daemon2command) { 
    $command2daemon{$value} = $key 
  }

  # create a hash to store the status of daemons, intialize to "bad"...
  my( %daemon_status);
  foreach $key ( keys %daemon2command) { $daemon_status{$key} = $X::error }

  # get info from polo...
  &get_daemon_status( "deimos status daemons", 
		      \%daemon_status, \%command2daemon);

  # get info from keamano...
  &get_daemon_status( "rsh keamano deimos status daemons", 
		      \%daemon_status, \%command2daemon);

  # get info about rotator...
  &get_rotator_daemon_status( \%daemon_status);

  # print daemon status...
  foreach $daemon_name ( sort keys %daemon_status){
    print labelize( "Checking $daemon_name") . $daemon_status{$daemon_name} . "\n";
    if( $daemon_status{$daemon_name} eq $X::error){ $n_errors++ }
  }
}

#----------------------------------------
# Check applications...
#----------------------------------------

if ( $check{"apps"} ) {

  # see whether programs are running...
  printf "Checking DEIMOS applications:\n";

  my( $item);
  foreach $item (qw(write_image fcstrack ds9relay lickserv)) {

    print &labelize("Checking $item");
    if( system( "get_deimos_pid $item > /dev/null") == 0){
      print "$X::good\n"
    } else {
      print "$X::error\n";
      $n_errors++;
    }

  }
}

#----------------------------------------
# Check keywords
#----------------------------------------

if ( $check{"keywords"} ) {

  # see whether programs are running...
  printf "Checking DEIMOS keyword libraries:\n";

  # define things to check...
  my( %keywords);
  $keywords{ 'dispatcher 1' } = { LIBRARY => 'deimot', KEYWORDS => 'tvfilraw'};
  $keywords{ 'dispatcher 2' } = { LIBRARY => 'deimot', KEYWORDS => 'g4tltraw'};
  $keywords{ 'piezo' }        = { LIBRARY => 'deimot', KEYWORDS => 'tmirrraw'};
  $keywords{ 'hplogger' }     = { LIBRARY => 'deimot', KEYWORDS => 'hplogtim'};
  $keywords{ 'bargun' }       = { LIBRARY => 'deimot', KEYWORDS => 'slbarcfg'};
  $keywords{ 'CCD+infopatcher'}={ LIBRARY  => 'deiccd', 
				  KEYWORDS => 'tempdet wcrate observer'};
  $keywords{ 'FCS+infopatcher'}={ LIBRARY => 'deifcs', 
				  KEYWORDS => 'wcrate observer'};
  $keywords{ 'rotator'}       = { LIBRARY => 'deirot', 
				  KEYWORDS => 'rotatval'};
  $keywords{ 'ACS'}           = { LIBRARY => 'acs', KEYWORDS => 'mode'};
  $keywords{ 'DCS'}           = { LIBRARY => 'dcs', KEYWORDS => 'ra'};

  # perform checks...
  foreach $system ( sort keys %keywords){
    print &labelize("Checking $system");
    $command = join( " ", 
		     ("/local/kroot/bin/show -s",
		      $keywords{$system}->{LIBRARY},
		      $keywords{$system}->{KEYWORDS},
		      "1>/dev/null",
		      "2>&1")
		   );
    if( system($command) == 0){
      print "$X::good\n"
    } else {
      print "$X::error\n";
      $n_errors++;
    }
  }
}

#----------------------------------------
# Check settings...
#----------------------------------------

if ( $check{"settings"} ) {

  # see whether programs are running...
  printf "Checking DEIMOS settings:\n";

  # get the temperature setpoint...
  $command = "/local/kroot/bin/show -s deiccd -terse tempset";
  my($tempset) = `$command`;
  chomp( $tempset);

  # define keywords to test and the corresponding desired values (GOODVALUE)...
  my( %stage_keywords, $n);
  $n = 0;
  $stage_keywords{$n++} = { NAME=>'dewar ion pump', 
			    LIBRARY=>'deimot', 
			    KEYWORD=>'ionpump1',
			    GOODVALUE=>'on',
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'vessel ion pump', 
			    LIBRARY=>'deimot', 
			    KEYWORD=>'ionpump2',
			    GOODVALUE=>'on',
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'Science CCD 15V Power', 
			    LIBRARY=>'deiccd',
			    KEYWORD=>'UTB15VEN',
			    GOODVALUE=>'enabled',
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'Science CCD 30V Power', 
			    LIBRARY=>'deiccd',
			    KEYWORD=>'UTB30VEN',
			    GOODVALUE=>'enabled',
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'CCD temp setpoint', 
			    LIBRARY=>'deiccd', 
			    KEYWORD=>'tempset',
			    MINVALUE=>-116,
			    MAXVALUE=>-114,
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'CCD temperature', 
			    LIBRARY=>'deiccd', 
			    KEYWORD=>'tempdet',
			    MINVALUE=>$tempset-1,
			    MAXVALUE=>$tempset+1,
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'FCS CCD 15V Power', 
			    LIBRARY=>'deifcs',
			    KEYWORD=>'UTB15VEN',
			    GOODVALUE=>'enabled',
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'FCS CCD 30V Power', 
			    LIBRARY=>'deifcs',
			    KEYWORD=>'UTB30VEN',
			    GOODVALUE=>'enabled',
			    BADSTATUS=>$X::error};
  $stage_keywords{$n++} = { NAME=>'Current instrument', 
			    LIBRARY=>'dcs',
			    KEYWORD=>'currinst',
			    GOODVALUE=>'DEIMOS',
			    BADSTATUS=>$X::warning};
  my( $rotccwlm) = -330;
  my( $rotcwlm) = 402;
  $stage_keywords{$n++} = { NAME=>'Rotator CCW limit', 
			    LIBRARY=>'dcs',
			    KEYWORD=>'rotccwlm',
			    MINVALUE=>$rotccwlm-1,
			    MAXVALUE=>$rotccwlm+1,
			    BADSTATUS=>$X::warning};
  $stage_keywords{$n++} = { NAME=>'Rotator CW limit', 
			    LIBRARY=>'dcs',
			    KEYWORD=>'rotcwlm',
			    MINVALUE=>$rotcwlm-1,
			    MAXVALUE=>$rotcwlm+1,
			    BADSTATUS=>$X::warning};
  $stage_keywords{$n++} = { NAME=>'FCS lamp', 
			    LIBRARY=>'deifcs',
			    KEYWORD=>'fcscusel',
			    GOODVALUE=>'Cu1',
			    BADSTATUS=>$X::warning};
  $stage_keywords{$n++} = { NAME=>'FCS focus tolerance 1', 
			    LIBRARY=>'deifcs',
			    KEYWORD=>'fcsfoto1',
			    MINVALUE=>100,
			    MAXVALUE=>1000,
			    BADSTATUS=>$X::warning};
  $stage_keywords{$n++} = { NAME=>'FCS focus tolerance 2', 
			    LIBRARY=>'deifcs',
			    KEYWORD=>'fcsfoto2',
			    MINVALUE=>100,
			    MAXVALUE=>1000,
			    BADSTATUS=>$X::warning};

  # loop over keywords and perform test...
  foreach $n ( keys %stage_keywords ){

    # extract system name from hash...
    $system = $stage_keywords{$n}->{NAME};
    print &labelize("Checking $system");

    # build and execute command to return keyword value...
    $command = "/local/kroot/bin/show -s " .
      $stage_keywords{$n}->{LIBRARY} .
      " -terse " .
      $stage_keywords{$n}->{KEYWORD};
    $value = `$command`;
    chomp( $value);

    # test the value against the desired...
    $message = '';
    if( defined($stage_keywords{$n}->{GOODVALUE})) {
      if( $value eq $stage_keywords{$n}->{GOODVALUE}){
	$state = $X::good
      } else {
	$state = $stage_keywords{$n}->{BADSTATUS};
	$message = sprintf( "Current value '%s' should be '%s'",
			    $value,
			    $stage_keywords{$n}->{GOODVALUE});
      }
    } elsif( defined($stage_keywords{$n}->{MINVALUE}) and
	     defined($stage_keywords{$n}->{MAXVALUE})) {
      if( $value >= $stage_keywords{$n}->{MINVALUE} and
	  $value <= $stage_keywords{$n}->{MAXVALUE}){
	$state = $X::good
      } else {
	$state = $stage_keywords{$n}->{BADSTATUS};
	$message = sprintf( "Current value %s outside of good range %s - %s",
			    $value,
			    $stage_keywords{$n}->{MINVALUE},
			    $stage_keywords{$n}->{MAXVALUE});
      }
    } else {
      $state = $X::warning;
      $message = "No valid comparison defined";
    }

    if( $state eq $X::error) {
      $n_errors++;
    } elsif( $state eq $X::warning) {
      $n_warnings++;
    } elsif ($state ne $X::good) {
      die "illegal BADSTATUS value is neither ERROR nor WARNING"
    }

    printf "%s %s\n", $state, $message;
  }
}

#----------------------------------------
# print final status...
#----------------------------------------

printf "---------------------------------------------------------------\n";
if ( $n_errors == 0 and $n_warnings == 0) {
  printf "\tAll tested DEIMOS systems tested appear functional.\n"
} else {
  printf "\t%d errors and $n_warnings warnings were issued.\n", $n_errors
}
printf "---------------------------------------------------------------\n";

if ( $n_errors > 0 ) {
  exit $n_errors
} elsif ( $n_warnings > 0 ) {
  exit -$n_warnings
}

#-----------------------------------------------------------------------
sub labelize {
#-----------------------------------------------------------------------
  my( $string) = @_;
  my( $size) = 35;
  my( $label);
  
  $label = "  $string";
  while( length($label) < $size ){ $label .= "." }
  return $label
}

#-----------------------------------------------------------------------
sub get_daemon_status {
#-----------------------------------------------------------------------
  my( $status_command, $daemon_status, $command2daemon) = @_;
  my( $command, $pid, $daemon_name);

  # get info from keamano...
  open STATUS, "$status_command |";
  while ( <STATUS> ) {
    if (m/^(.*) is running .* pid = (\d+)$/) {

      # extract command and PID from matching string...
      $command = $1;
      $pid = $2;
      
      # check for undefined command...      
      unless( defined($$command2daemon{$command})) {
	warn "unknown daemon command $command";
	next;
      }

      # set status...
      $daemon_name = $$command2daemon{$command};
      $$daemon_status{$daemon_name} = $X::good;
    }
  }
  close STATUS;
}

#-----------------------------------------------------------------------
sub get_rotator_daemon_status {
#-----------------------------------------------------------------------
  my( $daemon_status ) = @_;
  my( @deirot_cache, @deirot_dispatcher, @deirot_watchdcs);

  # get info from keamano...
  open STATUS, "rsh keamano deirot status |";
  while ( <STATUS> ) {

    # check for deirot.cache...
    # a) there is only one cache_daemon process;
    # b) your script probably shouldn't check the exact file argument;
    # c) your script shouldn't require the specific state of deirot_cache to
    # be "S", but should instead allow other state.

    if (m|^deirot.cache.* is running|) { 
      $deirot_cache[0]++ 
    }
    if (m|^deirot.cache.* . .* /kroot/bin/keyword_cache_daemon|) { 
      $deirot_cache[1]++ 
    }

    # check for deirot.dispatcher...
    # a) there are at least 3 processes that compose deirot.dispatcher; there can
    #     (rarely) be a 4th process if an audio player is running (long story :-): 
    #     i)   the main process;
    #     ii)  a co-process to handle the MUSIC messages;
    #     iii) a co-process to send status back to DCS;
    #
    # b) don't depend on the arguments being what they are right now -- in
    #     fact, it's a bit of a bug that they are so similar (another long
    #     story :-)
    # 
    # c) don't require the state character to be specifically "R" for the
    #     main dispatcher and "S" for the others -- every so often, it will
    #     be different.

    if (m|^deirot.dispatche.* is running|) { 
      $deirot_dispatcher[0]++
    }
    if (m|^deirot.dispatche.* . .* /kroot/bin/deirotd|){
      $deirot_dispatcher[1]++
    }

    # deirot.watchdcs is a threaded program, and you should see exactly 4
    # threads (each one will show up under a separate process id).
    # check for deirot.watchdcs...

    if (m|^deirot.watchdcs.* is running|) { 
      $deirot_watchdcs[0]++
    }
    if (m|^deirot.watchdcs.* /kroot/bin/watch_keywords|) {
      $deirot_watchdcs[1]++
    }

  }
  close STATUS;

  # verify that we got what we expected...
  if ( $deirot_cache[0]==1 and $deirot_cache[1]==1 ){
    $$daemon_status{"deirot.cache"} = $X::good 
  }

  if ( $deirot_dispatcher[0]==1 and $deirot_dispatcher[1]>=3 ){
    $$daemon_status{"deirot.dispatcher"} = $X::good
  }

  if( $deirot_watchdcs[0]==1 and $deirot_watchdcs[1]==4 ){
    $$daemon_status{"deirot.watchdcs"} = $X::good
  }
}