#!/usr/bin/perl -w #+ # testAll -- check functionality of all OSIRIS systems # # Purpose: # Test all of the OSIRIS systems to determine whether they are # working properly. The following checks are made: # - Check that all devices are pingable # - Check that all servers are running # - Check that daemons are alive # - Check that AC power is on to all stages (except lamps, # which should be OFF) # - Check that all stages are reporting OK and not locked # - Check critical environment settings # # Usage: # testAll [-h] [-m] [systems] # # Arguments: # systems = [computers servers apps keywords power stages] # Systems which should be tested. Defeault is all of them. # # Options: # -h = print help # -m = pass output to less # # Output: # Report written to stdout # # Exit values: # 0 = normal completion # <0 = warnings but no errors # >0 = error # # Example: # 1) To check instrument status: # testAll # # Nominal output: #- # Modification history: # 2018-feb-06 jlyke Adapted for OSIRIS from MOSFIRE's testAll # 2020-jun-30 jlyke updates for computer upgrades #----------------------------------------------------------------------- use strict; use Getopt::Std; $| = 1; # misc definitions... $X::instrument = 'OSIRIS'; $X::default = 'DEFAULT'; ($X::good, $X::warning, $X::error) = ("OK", "WARNING!", "ERROR!"); my( $n_errors) = 0; # number of errors my( $n_warnings) = 0; # number of errors my( $key, $value); my( $type, $pid); my( $server_name); my( $computer); my( $disk); my( @command); my( $system); my( $command); my( $state, $message); our( %options); my( $thresh) = 0.1; my( $goodhost) = 'napili'; # define all possible systems to test... #my( @systems) = qw( computers servers power settings daemons stages); my( @systems) = qw( computers disks servers power settings stages); # define usage... my $cmd = `basename $0`; chomp( $cmd); my $usage = "Usage: $cmd [" . join( " ", @systems) . "]"; # parse flags... getopts('hm', \%options); die "$usage\n" if $options{'h'}; exec "$0 | less" if $options{'m'}; # default state for each system is "off"... my( %check); foreach $system ( @systems ) { $check{$system} = 0 } # default: if no args are specified, check all systems EXCEPT dispatchers... if ( @ARGV < 1 ) { foreach $system ( @systems ) { $check{$system} = 1 if $system !~ /apps/; } } # if args are specified, turn on certain systems... while ( $#ARGV >= 0 ) { $system = shift; if( defined( $check{$system})){ $check{$system} = 1 } else { die "$usage\n" } } # verify host... #my( $goodhost) = 'napili'; #if( $ENV{'HOST'} ne $goodhost ){ # die "ERROR: you can only run $cmd on $goodhost\n" #} #---------------------------------------- # Check computers #---------------------------------------- if ( $check{"computers"} ) { # see whether programs are running... printf "Checking $X::instrument computers:\n"; my( %computer_description); $computer_description{'napili'} = 'OSIRIS host'; $computer_description{'puunoa'} = 'SPEC detector target'; $computer_description{'kuiaha'} = 'IMAG detector target'; $computer_description{'osiris-control1'} = 'SPEC sidecar PC'; $computer_description{'osiris-control2'} = 'IMAG sidecar PC'; $computer_description{'osiris-drp'} = 'DRP machine'; # it seems only $goodhost can see switches and terminal server if( $ENV{'HOST'} eq $goodhost ){ $computer_description{'osrsterm'} = 'terminal server'; $computer_description{'osrssw1'} = 'network switch in cabinet'; $computer_description{'osrssw2'} = 'network switch in computer room'; } # $computer_description{'pauoa'} = 'hot spare for osirisserver'; # $computer_description{'osiris-odrp'} = 'spare DRP machine'; foreach $computer ( sort keys %computer_description){ print &labelize(" $computer"); # if Solaris: "ping $computer 1 > /dev/null" if( system( "ping -c 1 $computer > /dev/null") == 0){ print "$X::good (" . $computer_description{$computer} . ")\n" } else { print "$X::error (" . $computer_description{$computer} . ")\n"; $n_errors++; } } } #---------------------------------------- # Check disks #---------------------------------------- if ( $check{"disks"} ) { # see whether programs are running... printf "Checking $X::instrument disks:\n"; my( $datadisk) = $ENV{'OSIRIS_DATA_ROOT'}; my( %disk_description); $disk_description{"$datadisk"} = 'Data disk'; foreach $disk ( sort keys %disk_description){ print &labelize(" $disk"); my( $disk_avail) = `df -h --output=avail "$datadisk" | tail -1`; my( $disk_pcent) = `df -h --output=pcent "$datadisk" | tail -1 | sed -e 's/%//g'`; chomp($disk_avail); chomp($disk_pcent); $disk_avail =~ s/^\s+//; # remove leading whitespace $disk_pcent =~ s/^\s+//; # remove leading whitespace $message = sprintf( "Avail: %s, Capacity: %s%%", $disk_avail, $disk_pcent); if( $disk_pcent < 97){ print "$X::good (" . $message . ")\n" } else { print "$X::error (" . $message . ")\n"; $n_errors++; } } } #---------------------------------------- # Check servers #---------------------------------------- if ( $check{"servers"} ) { # see whether programs are running... printf "Checking $X::instrument keyword servers:\n"; # define servers... # required are required for instrument to run # extra are used for monitoring my( %required_server_description) = ( 'osiris' => 'global server', 'osds' => 'SPEC detector', 'oids' => 'IMAG detector', 'om1s' => 'SPEC collimator wheel', 'om2s' => 'SPEC filter wheel', 'om3s' => 'SPEC camera wheel', 'om4s' => 'SPEC lenslet mask', 'om5s' => 'IMAG filter wheel 1', 'om6s' => 'IMAG filter wheel 2', 'op1s' => 'power 1', 'op2s' => 'power 2', 'oprs' => 'dewar vacuum pressure monitor', 'ot1s' => 'dewar temperature monitor', 'ot2s' => 'electronics temperature monitor', 'otcs' => 'temperature control' ); my( %extra_server_description) = ( 'osirishistory' => 'keyword history', 'osirismon' => 'system monitor', 'osirisproc1' => 'process monitor 1', 'osirisproc2' => 'process monitor 2' ); # loop over servers and check the lastalive keyword... my $default_keyword = "lastalive"; my( %server_keyword) = ( 'osirishistory' => 'uptime', 'osirismon' => 'disp1sta', 'osirisproc1' => 'disp1sta', 'osirisproc2' => 'disp2sta'); my $server; my $keyword; # compute an alphabetized list with global server first.. my( %server_description) = (%required_server_description, %extra_server_description); my @servers = grep( $_ ne 'osiris', sort keys %server_description); unshift @servers, 'osiris'; foreach $server (@servers) { if( defined($server_keyword{$server}) ) { $keyword = $server_keyword{$server}; } else { $keyword = $default_keyword; } # special because there are two separate processes, but one service my $service; if ($server eq "osirisproc1" || $server eq "osirisproc2") { $service = 'osirisproc'; } else { $service = $server; } print &labelize(" $server"); if( system( "show -s $service $keyword > /dev/null") == 0){ print "$X::good (" . $server_description{$server} . ")\n"; } else { if( defined($required_server_description{$server}) ) { print "$X::error (" . $server_description{$server} . ")\n"; $n_errors++; } else { print "$X::warning (" . $server_description{$server} . ")\n"; $n_warnings++; } } } } #---------------------------------------- # Check applications... #---------------------------------------- if ( $check{"apps"} ) { # see whether programs are running... printf "Checking $X::instrument applications:\n"; my( $item); foreach $item (qw(OOPGUI ODEC OSGUI OTGUI OORGUI)) { print &labelize(" $item"); if( system( "get_osiris_pid $item > /dev/null") == 0){ print "$X::good\n" } else { print "$X::error\n"; $n_errors++; } } } #---------------------------------------- # Check applications... #---------------------------------------- if ( $check{"daemons"} ) { # see whether programs are running... printf "Checking $X::instrument daemons: \n"; my( $item); foreach $item (qw( watchrot watchslew watchfcs autodisplay )) { print &labelize(" $item"); my $keyword = "${item}ok"; my $value = `show -s osiris -terse $keyword`; if( $value == 1 ){ print "$X::good\n" } else { print "$X::warning : Ignore for afternoon checkout.\n"; $n_warnings++; } } #check the darcorr keyword print &labelize(" DAR correction"); my $keyword = "darenabl"; my $value = `show -s osiris -terse $keyword`; if( $value == 1 ){ print "$X::good\n" } else { print "$X::warning : Ignore if not on-sky and guiding\n"; $n_warnings++; } } #---------------------------------------- # Check power... #---------------------------------------- if ( $check{"power"} ) { # define keywords to test and the corresponding desired values (GOODVALUE)... my( @stage_keywords) = ( # power strip 1 pos 1 UNUSED # power strip 1 pos 2 UNUSED # power strip 1 pos 3 UNUSED # power strip pos 4 { NAME=>'IMAG SAM', LIBRARY=>'op1s', KEYWORD=>'pwstat4', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 1 pos 5 { NAME=>'SPEC SAM', LIBRARY=>'op1s', KEYWORD=>'pwstat5', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 1 pos 6 { NAME=>'IMAG PC', LIBRARY=>'op1s', KEYWORD=>'pwstat6', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 1 pos 7 { NAME=>'SPEC PC', LIBRARY=>'op1s', KEYWORD=>'pwstat7', GOODVALUE=>'1', BADSTATUS=>$X::warning}, # power strip 1 pos 8 { NAME=>'EC Cooling System', LIBRARY=>'op1s', KEYWORD=>'pwstat8', GOODVALUE=>'1', BADSTATUS=>$X::warning}, # power strip 2 pos 1 { NAME=>'Pressure Gauge', LIBRARY=>'op2s', KEYWORD=>'pwstat1', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 2 pos 2 { NAME=>'Lakeshore 340', LIBRARY=>'op2s', KEYWORD=>'pwstat2', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 2 pos 3 { NAME=>'Dewar Lakeshore 218', LIBRARY=>'op2s', KEYWORD=>'pwstat3', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 2 pos 4 { NAME=>'Cabinet Lakeshore 218', LIBRARY=>'op2s', KEYWORD=>'pwstat4', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 2 pos 5 { NAME=>'Motor Controllers', LIBRARY=>'op2s', KEYWORD=>'pwstat5', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 2 pos 6 { NAME=>'Terminal Server', LIBRARY=>'op2s', KEYWORD=>'pwstat6', GOODVALUE=>'1', BADSTATUS=>$X::error}, # power strip 2 pos 7 UNUSED # power strip 2 pos 8 { NAME=>'EC Cooling System', LIBRARY=>'op2s', KEYWORD=>'pwstat8', GOODVALUE=>'1', BADSTATUS=>$X::error}, ); &reportStage( "power", \@stage_keywords); }; #---------------------------------------- # Check settings... #---------------------------------------- if ( $check{"stages"} ) { # define keywords to test and the corresponding desired values (GOODVALUE)... my( @stage_keywords) = ( # SPEC Collimator Wheel { NAME=>'SPEC Collimator', LIBRARY=>'om1s', KEYWORD=>'status', GOODVALUE=>'OK', LOCKED=>'lockall', BADSTATUS=>$X::warning}, # SPEC Filter { NAME=>'SPEC Filter Wheel', LIBRARY=>'om2s', KEYWORD=>'status', GOODVALUE=>'OK', LOCKED=>'lockall', BADSTATUS=>$X::warning}, # SPEC Camera Wheel { NAME=>'SPEC Camera', LIBRARY=>'om3s', KEYWORD=>'status', GOODVALUE=>'OK', LOCKED=>'lockall', BADSTATUS=>$X::warning}, # SPEC Lenslet Mask { NAME=>'SPEC Lenslet Mask', LIBRARY=>'om4s', KEYWORD=>'status', GOODVALUE=>'OK', LOCKED=>'lockall', BADSTATUS=>$X::warning}, # IMAG Filter Wheel #1 { NAME=>'IMAG Filter #1', LIBRARY=>'om5s', KEYWORD=>'status', GOODVALUE=>'OK', LOCKED=>'lockall', BADSTATUS=>$X::warning}, # IMAG Filter Wheel #2 { NAME=>'IMAG Filter #2', LIBRARY=>'om6s', KEYWORD=>'status', # GOODVALUE=>'OK|Tracking', # two possible values are OK here GOODVALUE=>'OK', # two possible values are OK here LOCKED=>'lockall', BADSTATUS=>$X::warning}, ); &reportStage( "stages", \@stage_keywords); }; #---------------------------------------- # Check settings... #---------------------------------------- if ( $check{"settings"} ) { # define keywords to test and the corresponding desired values (GOODVALUE)... my( @stage_keywords) = ( # detector temp... { NAME=>"SPEC detector temp", LIBRARY=>'otcs', KEYWORD=>'tmp1', MINVALUE=> `show -s otcs -terse trgtmp1` - $thresh, MAXVALUE=> `show -s otcs -terse trgtmp1` + $thresh, BADSTATUS=>$X::warning}, # detector temp... { NAME=>"IMAG detector temp", LIBRARY=>'otcs', KEYWORD=>'tmp2', MINVALUE=> `show -s otcs -terse trgtmp2` - $thresh, MAXVALUE=> `show -s otcs -terse trgtmp2` + $thresh, BADSTATUS=>$X::warning}, # CCR state... { NAME=>"CCR Head", LIBRARY=>'ot1s', KEYWORD=>'tmp1', MINVALUE=>38.0, MAXVALUE=>45.0, BADSTATUS=>$X::error}, # Dewar pressure { NAME=>'dewar pressure', LIBRARY=>'oprs', KEYWORD=>'pressure', MINVALUE=> `show -s oprs -terse pressrng | cut -f2 -d[ | cut -f1 -d] | cut -f1 -d,`, MAXVALUE=> `show -s oprs -terse pressrng | cut -f2 -d[ | cut -f1 -d] | cut -f2 -d,`, BADSTATUS=>$X::error}, # datataking system status... { NAME=>'SPEC ready to expose', LIBRARY=>'osds', KEYWORD=>'ready', GOODVALUE=>'1', BADSTATUS=>$X::warning}, # datataking system status... { NAME=>'IMAG ready to expose', LIBRARY=>'oids', KEYWORD=>'ready', GOODVALUE=>'1', BADSTATUS=>$X::warning}, # current instrument check... { NAME=>'current instrument', LIBRARY=>'dcs', KEYWORD=>'currinst', GOODVALUE=>$X::instrument, BADSTATUS=>$X::warning} ); &reportStage( "settings", \@stage_keywords); }; #---------------------------------------- # print final status... #---------------------------------------- printf "---------------------------------------------------------------\n"; if ( $n_errors == 0 and $n_warnings == 0) { printf "\tAll tested $X::instrument systems appear functional.\n" } else { printf "\t%d errors and $n_warnings warnings were issued.\n", $n_errors } printf "---------------------------------------------------------------\n"; if ( $n_errors > 0 ) { exit $n_errors } elsif ( $n_warnings > 0 ) { exit -$n_warnings } #----------------------------------------------------------------------- sub labelize { #----------------------------------------------------------------------- my( $string) = @_; my( $size) = 30; my( $label); $label = " $string"; while( length($label) < $size ){ $label .= "." } return $label } #----------------------------------------------------------------------- sub reportStage { #----------------------------------------------------------------------- my $system = shift; my $stage_keywords = shift; my $record; my $library; my $keyword; my @goodvalues; my $v; # see whether programs are running... printf "Checking $X::instrument $system:\n"; # loop over keywords and perform test... foreach $record ( @$stage_keywords ){ # extract system name from hash... $system = $record->{NAME}; $library = $record->{LIBRARY}; $keyword = $record->{KEYWORD}; # resolve default name... if( $system eq $X::default ){ $command = "show -s $library -terse ${keyword}loc"; $system = `$command`; chomp( $system); } print &labelize(" $system"); # build and execute command to return keyword value... $command = "show -s $library -terse $keyword"; $value = `$command`; chomp( $value); # test the value against the desired... $message = ''; if( defined($record->{GOODVALUE})) { # split string into potentially multiple good values... @goodvalues = split( /\|/, $record->{GOODVALUE}); # compare the current value to all possible good values... $state = $record->{BADSTATUS}; foreach $v (@goodvalues){ $state = $X::good if $value eq $v } # add appropriate message if not good... if( $state ne $X::good){ my $temp = q(') . join( q(' or '), @goodvalues ) . q('); $message = sprintf( "Current: '%s', want: %s", $value, $temp); if ( $system eq "FCS correcting" ) { $message = "$message. Ignore for afternoon checkout."; } } } elsif( defined($record->{MINVALUE}) and defined($record->{MAXVALUE})) { if( $value >= $record->{MINVALUE} and $value <= $record->{MAXVALUE}){ $state = $X::good; } else { $state = $record->{BADSTATUS}; $message = sprintf( "Current: %s outside of good range %s - %s", $value, $record->{MINVALUE}, $record->{MAXVALUE}); } } else { $state = $X::warning; $message = "No valid comparison defined"; } # optional lock check,,, if( defined( $record->{LOCKED})){ # build and execute command to return keyword value... $command = "show -s $library -terse $record->{LOCKED}"; $value = `$command`; chomp( $value); if ( $value ne '0' ) { $state = $record->{BADSTATUS}; $message = 'stage is locked out'; # implemented an ignore statement if tested during afternoon checkout. if ( $system eq "dust cover" ) { $message = 'stage is locked out. Ignore for afternoon checkout.'; } } } if( $state eq $X::error) { $n_errors++; } elsif( $state eq $X::warning) { $n_warnings++; } elsif ($state ne $X::good) { die "illegal BADSTATUS value is neither ERROR nor WARNING"; } printf "%s %s\n", $state, $message; } }