/*****************************************************************************
 * PROJECT: Xavier
 *
 * (c) Copyright 1994 Joseph O'Sullivan. All rights reserved.
 *
 * FILE: nannyUtils.c
 *
 * ABSTRACT:
 *  Sleeps waiting for processes to connect via a socket. When a process 
 *  connects, the connecting process describes one of...
 *   a) what program to run on this machine. we then run said program.
 *   b) what program to kill on this machine. we then try to kill said program
 *   c) ???
 * 
 * The file is divided up into several logical groupings. Search for the
 * followings headers for functions relating to that grouping
 * 
 * !!Checking processes (ps aux stuff)
 * !!Killing of processes
 * !!Utility functions for running of processes
 * !!Scheduling/Running of processes
 * !!pseudoTerimal
 * !!child management
 * !!messagePassing - wrapper functions which check for closed connections 
 * !!unused
 *
 * $Source: /afs/cs.cmu.edu/project/TCA/Master/tcaV8/tools/nanny/nannyUtils.c,v $
 * $Revision: 1.12 $
 * $Date: 1996/07/29 05:03:10 $
 * $Author: josullvn $
 *
 * REVISION HISTORY:
 *
 * $Log: nannyUtils.c,v $
 * Revision 1.12  1996/07/29  05:03:10  josullvn
 * Bloody hell. This commiting is a bit different than under Xavier. Short
 * story is cleaned up some purify bugs, and also made changes to nanny
 * which should make it a bit better - Improving performance over multiple
 * machines, explict quietening of nondisplayed processes, replacing of
 * runConsole with xfMiniConsole, which is multithreaded, vt102 compilant,
 * adds a uniform emacs-like command line editing feature, better on small
 * screens and otherwise fab.
 *
 * Revision 1.11  1996/07/25  22:25:26  rich
 * Fixed uninitialized memory references.
 *
 * Revision 1.10  1996/06/28  14:07:27  reids
 * Fixed quite a few bugs -- with graphics, interaction with script, and
 *   killing processes
 *
 * Revision 1.9  1996/02/20  11:40:38  josullvn
 * Made extensive changes:
 *    added xfRunConsole, an xforms based interface which was quick
 *    To change the interface, from the nanny dir run
 *    /afs/cs/user/josullvn/xforms/DESIGN/fdesign xfConsole
 *    to get going, and made debugging easier.
 *    added tons of comments.
 *    Debug spews a lot more info back, so just compile
 *    with DEBUG=DEBUG if you really need it.
 *    I've added some timeouts to the code, which will
 *    probably be tweaked later. There are in xfCallbacks.c
 *    Its become better at keeping processes going, and
 *    so its harder to quit. Using shutdown can be
 *    painful. Killing the processes individually is
 *    probably better. Need to do a reverse dependancy when quitting.
 *    Multiple machines are again poor. It doesn't wait sufficently
 *    for dependencies - so you need to run things explicitly...
 *
 * Revision 1.8  1996/02/18  21:17:48  reids
 * Put script stuff back in;
 *   Try to fix pseudo-terminal handling for UNIX and LINUX;
 *   Resize dialog windows when text gets too long.
 *
 * Revision 1.7  1996/02/13  21:30:00  rich
 * Try using setpgid for linux.
 *
 * Revision 1.6  1996/02/12  00:13:02  rich
 * Have to use O_ for old tty constants.
 *
 * Revision 1.5  1996/02/11  21:35:20  rich
 * Updated GNUmakefiles for faster complilation.  Use FAST_COMPILE=1 for
 * routine recompiles.
 *
 * Revision 1.4  1996/02/10  16:52:20  rich
 * Made private functions static and fixed some forward declarations.
 *
 * Revision 1.3  1996/01/22  21:31:01  reids
 * Fixed the way stdout is handled, using pseudo-terminals, so that it preserves
 *   the line-buffering mode of the real TTY.
 * Added support for running scripts (not really integrated, yet, but initial
 *   tests are working).
 *
 * Revision 1.2  1996/01/05  16:33:33  rich
 * Nanny fixes.
 *
 * Revision 1.1  1995/12/17  20:25:50  rich
 * Moved Nanny to the tca release.
 *
 * Revision 1.39  1995/09/29  05:58:08  rich
 * Added prototypes.
 *
 * Revision 1.38  1995/09/28  19:32:02  josullvn
 * Lots o changes
 *
 * Revision 1.37  1995/09/20  01:58:44  rich
 * Reduced the number of libraries needed.
 * Cleaned up warnings.
 *
 * Revision 1.36  1995/08/14  22:40:49  rich
 * Changes for the new functional devUtils (8.1.6).
 * "-clean" option on nanny now prompts before killing tasks, unless the
 * "-noprompt" flag is used.
 *
 * Revision 1.35  1995/08/06  00:05:15  rich
 * Changes for new devUtils in tca-8.1.
 *
 * Revision 1.34  1995/07/30  02:01:19  rich
 * Moved common defines to etc/GNUmakefile.defs.
 * Added loadRobotConfig.
 *
 * Revision 1.33  1995/07/21  15:13:20  josullvn
 * Added a restarts option - see the README. updated some .rc files to give
 * examples. Also fixed conflicts to that runConsole is notified about dead
 * processes.
 *
 * Revision 1.32  1995/07/17  04:40:59  josullvn
 * Altered my handling of kills from calling system to using clib kill function
 *
 * Revision 1.31  1995/07/15  07:13:15  josullvn
 * ProcessDevices is _not_ reentrant - this was causing lots of problems as it
 * turns out. Restructing my blocking code fixed it.
 * Also, added a notion of dead - that is telling when something has
 * finally died.
 *
 * Revision 1.30  1995/07/13  14:04:52  robocomp
 * Linux specific changes. ps under linux is 1 character longer, and occasionally
 * writes to stderr. Also, added a linux simulator due to the different paths
 *
 * Revision 1.29  1995/07/13  11:09:49  josullvn
 * Two bugs were present. a) syscalls were being interrupted, and b) the
 * processes button management got screwy after a while. The first is due
 * to Devutils, second fixed by removing a reliance on xclient data. Also
 * added features to newProcess whereby it is now menu driven.
 * Added tca.rc, removing those processes from the Simulator resource files.
 *
 * Revision 1.28  1995/07/12  02:46:32  josullvn
 * Test the result of message passing commands for failure
 *
 * Revision 1.27  1995/07/11  23:56:49  josullvn
 * Added some debugging info to COnnectionNoting
 *
 * Revision 1.26  1995/07/11  12:10:27  josullvn
 * Needed Execing and Killing to wait for remote dependancies properly.
 *
 * Revision 1.25  1995/07/11  11:49:19  josullvn
 * Needed to notify any new nanny immediately upon noticing them.
 * Otherwise, run into trouble during exec...
 *
 * Revision 1.24  1995/07/11  11:21:36  josullvn
 * A lot more changes to message passing to handle multiple nannys better.
 * This basically involved removing fdclient, and being smarter about what
 * nannys are in the system
 *
 * Revision 1.23  1995/07/11  08:42:05  josullvn
 * Further refinements to -clean option.
 *
 * Revision 1.22  1995/07/11  08:01:26  josullvn
 * Improved -clean option
 *
 * Revision 1.21  1995/07/11  06:58:50  josullvn
 * Added capabilities for inter-nanny communication.
 * Mainly, there is now a list of connections, and each new executable is
 * announced to the whole list - may need to work on the kill part...
 *
 * Revision 1.20  1995/07/11  01:06:34  josullvn
 * Whesh. OK, fixed a bug in parsing lines, where by if messages get corrupted,
 * we can recover. Added a -clean option to nanny. Discovered that the old
 * double newProcess bug has returned to haunt me.
 *
 * Revision 1.19  1995/07/09  05:25:11  josullvn
 * Added TCA test suite to Simulator.rc - helps with debugging
 * Fixed a display inheritance bug that interfere with dependancies starting up.
 * Fixed a problem with parsing data.
 * Improved communication feedbacks to runConsole from nanny.
 *
 * Revision 1.18  1995/07/04  05:46:30  josullvn
 * The latest update. Some stuff vanished (???), but recovered a bit.
 * Now have improved notification of whats happening between runConsole
 * and nanny, and better starting off of processes.
 *
 * Revision 1.17  1995/06/25  01:03:51  rich
 * Moved devUtils to tca.
 *
 * Revision 1.16  1995/06/15  22:11:07  rich
 * Linux 1.2 changes. Still does not compile.
 *
 * Revision 1.14  1995/06/02  23:02:52  rich
 * Added support for OSF 2.
 *
 * Revision 1.13  1995/05/24  15:50:46  josullvn
 * Had deleted two lines by accident in nannyUtils which prevented commands being
 * passed to processes. More support for intermachine communication. Now
 * can do rcsh to run a csh shell remotely on heart (according to new
 * Simulator.rc file)
 *
 * Revision 1.12  1995/05/23  23:58:40  josullvn
 * Yeah. Fixed the SIGCHLD problem (tured out that popen is internally
 * implemented with fork - when pclose was called, it generated a SIGCHLD
 * which intereupted the system call which lead to trouble).
 * Fixed environment variables, can now add env variables in .rc file, and
 * they are passed to appropriate child. Realized that need to also
 * provide an ability to pass the display variable from runConsole to the
 * nanny - being worked on.
 * Bug exists in devUtils (?) which large data streams. Run csh as a new process
 * and do ps auxww to cause it to occur.
 *
 * Revision 1.11  1995/05/20  02:07:34  josullvn
 * Some more bells and whistles to the console callback.
 * Added environment variables to the resource definition.
 * Extended the still buggy children catching for time outs.
 * Added diagnositic code to analyze startup failures.
 *
 * Revision 1.10  1995/05/19  11:32:28  josullvn
 * Updated resource files to include "ready strings"
 * Debugged some protocol problems with messages.c parsing of split lines.
 * (Feel that remaining problem is in devUtils).
 * Added Highlightening to buttons.
 * Removed SIGCHLD trapping - its too flakey at the moment.
 *
 * Revision 1.9  1995/05/17  23:39:34  josullvn
 * Now restarts dead processes - need to add timeouts.
 *
 * Revision 1.8  1995/05/15  17:16:51  rich
 * Added nanny.h and moved external declarations to the .h files.
 *
 * Revision 1.7  1995/05/15  17:07:20  rich
 * Updated interface to createLineBuffer so you can get partial lines and
 * the delimit character is not replaced.
 * Imporved layout of the X11 window of the console.
 *
 *****************************************************************************/

#include "tca/libc.h"
#include "tca/devUtils.h"
#include "tca/stdinDev.h"

#include "nannyUtils.h"
#include "nannyDev.h"
#include "nanny.h"
#include "resource.h"
#include "messages.h"

#define MAX_TIMEOUT       60
#define SUB_TIMEOUT       3

#define MAX_TIMEOUT_TRYS  60
#define EACH_TIMEOUT      1

int currentlyWaitingForProgram = -1;

#define NWARNING1(x) nannyReportMessage(x)
#define NWARNING2(x,y) {char w[256]; sprintf(w,x,y); nannyReportMessage(w);}

/*
 * Global variables
 */
int             numConnects = 0;
connectInfoPtr  NotedConnectsList = NULL;

void nannyCheckWishlist(Pointer ignore1, Pointer ignore2);
BOOLEAN addpid(int *foundpids, int *nofound, int pid);
static int openParentPseudoTerminal(void);
static int openChildPseudoTerminal (int parentTTY);


/*****************************************************************************
 * !!Checking processes (ps aux stuff)
 *****************************************************************************/

/* DESCRIPTION:
 *     Utility function to check if a given process is in a zombie state 
 *
 * INPUTS:
 *    Process state identifer corresonding to the output from ps aux
 *
 * OUTPUTS:
 *    TRUE if state p is a zombie
 */
static BOOLEAN psZombiep(processInfoPtr p)
{
  DEBUG1("static BOOLEAN psZombiep(processInfoPtr p)\n");
  return (p->STATE[0] == 'Z');
}

/* DESCRIPTION:
 *     Utility function to check if a given process is in runnable
 *     Runnable is defined as when the process is running, or is scheduled
 *     in the runnable queue by the operating system.
 *
 * INPUTS:
 *    Process state identifer corresonding to the output from ps aux
 *
 * OUTPUTS:
 *    TRUE if state p is runnable
 */
static BOOLEAN psRunnablep(processInfoPtr p)
{
  DEBUG1("static BOOLEAN psRunnablep(processInfoPtr p)\n");
  return ((p->STATE[0] == 'S') || (p->STATE[0] == 'R'));
}

/* DESCRIPTION:
 *   Minor deriviative of the reliable get line, spruced up so as to be 
 *   ready to handle the results of the system call ps.
 *   
 * INPUTS:
 *   buff: a screen line will be placed here
 *   max_chars: max_length of buff
 *   fp: where the info being parsed is coming from (a popen)
 *
 * OUTPUTS:
 *   pointer to A \0 terminated line, less than max_chars in length, or NULL, 
 *   if stdin is finished
 */
static char *psGetLine(char *buff, int max_chars, FILE *fp)
{
  int i;
  int c;
  char *str;
  
  buff[0] = '\0';
  str = buff;
  for( i = 0; i < max_chars; i++){
    c = getc(fp);
    if (c == '\n' || c == EOF) {
      *buff++ = '\0';
      if (c == EOF)
	return NULL;
      else
	return str;
    }
    else 
      *buff++ = c;
  }
  return str;
}

/* DESCRIPTION:
 *  ps info is in discrete predefined sizes. this extracts such chunks.
 *  strtok could replace this easily.
 *
 * INPUTS:
 *  buf: line to be tokenized
 *  key: token that will be output
 *  len: length of line
 *
 * OUTPUTS:
 *  pointer to the remainder of line
 */
static char *psExtractKey(char *buf, char *key, int len)
{
  int i;
  
  for (i=0; i<len; i++)
    key[i] = *(buf++);
  key[i] = '\0';
  return buf;
}

/* DESCRIPTION:
 *  Does a system call to ps for a process, and fills up the 
 *  processInfoPtr structure with whatever I feel to be handy.
 *  This can be queried with psRunnable, psZombie, etc
 *
 * INPUTS:
 *  pid: process id to be searched for
 *  p:   structure for that process id to be filled
 *
 * OUTPUTS:
 *  TRUE if we were able to do ps, and could find the process.
 */
BOOLEAN nannyPs(int pid, processInfoPtr p)
{
  FILE *fd;
  char command[120];
  char *lptr, line[133]; /* this is the length of the -w option gives us */
  char dummy[10];
  void (*handler)(void); /* need to catch sigchld, as popen will trigger one */
  
  DEBUG1("BOOLEAN nannyPs(int pid, processInfoPtr p)\n");
  /* Don't want to know about errors ... */
  sprintf(command, "ps auwx%d 2> /dev/null", pid);
  line[0] = '\0';
  
  handler = signal(SIGCHLD, SIG_IGN);
  
  if ((fd = popen(command, "r"))==NULL) {
    NWARNING1("nannyPs: Unable to fork popen.\n");
    signal(SIGCHLD, handler);
    return FALSE;
  }
  
  if ((psGetLine(line, 132, fd)==NULL) ||
      (psGetLine(line, 132, fd)==NULL)) {
    pclose(fd);
    NWARNING1("nannyPs: ps returned empty.\n");
    signal(SIGCHLD, handler);
    return FALSE;
  }
  
  lptr = psExtractKey(line, dummy, 8);      /* USER */
  lptr = psExtractKey(lptr, dummy, 6);      /* PID */
  lptr = psExtractKey(lptr, dummy, 5);      /* %CPU */
  p->CPU = atof(dummy);
  lptr = psExtractKey(lptr, dummy, 5);      /* %MEM */
  p->MEM = atof(dummy);
  lptr = psExtractKey(lptr, dummy, 5);      /* %SS */
  p->SZ = atoi(dummy);
#if defined(linux)
  lptr = psExtractKey(lptr, dummy, 10);      /* RSS & TTY */
#else
  lptr = psExtractKey(lptr, dummy, 9);      /* RSS & TT */
#endif
  lptr = psExtractKey(lptr, p->STATE, 4);   /* STAT */
  lptr = psExtractKey(lptr, dummy, 15);      /* START & CPU */
  
  pclose(fd);
  signal(SIGCHLD, handler);
  return TRUE;
}


/*****************************************************************************
 * !!Killing of processes
 *****************************************************************************/

/* DESCRIPTION:
 *  If a program is executing locally, kill directly, and clean up the Fd's,
 *  and resource information. If the process is not executing locally, 
 *  pass the message on...
 *
 * INPUTS:
 *  r: resource ptr to program
 *
 * OUTPUTS:
 */
void nannyKillLocalProgram(rcProgramPtr r)
{
  char buffer[DEFAULT_LINE_LENGTH];

  DEBUG1("void nannyKillLocalProgram(rcProgramPtr r)\n");
  r->executing = FALSE;
      
  kill(r->pid, 9);

  /* This should be taken care of automatically by nannyDev, but
   * best to make sure here.
   */
  if (!Nanny_removeFd(r->fdstdout))
    NWARNING2("nannyKillLocalProgram: r->fdstout already deleted for %s\n",r->name);
#if 0
  /* These were found to be cleaned up previously... */
  if (!Nanny_removeFd(r->fdstdin))
    NWARNING2("nannyKillLocalProgram: r->fdstin already deleted for %s\n",r->name);
  if (!Nanny_removeFd(r->fdstderr))
    NWARNING2("nannyKillLocalProgram: r->fdsterr already deleted for %s\n",r->name);
#endif

  /* Close messages from this process and reset
   * the message buffers for stdin and error 
   */
  close(r->fdstdin);
  close(r->fdstderr);
  close(r->fdstdout);
  r->ibufEnd = 0;
  r->ebufEnd = 0;
  r->timeouts = 0;
      
  messageMakeDead(buffer, r->name);
  nannyReportDualMessage(buffer);
}

/* DESCRIPTION:
 *  If a program is executing locally, kill directly, and clean up the Fd's,
 *  and resource information. If the process is not executing locally, 
 *  pass the message on...
 *
 * INPUTS:
 *  name of the program
 *
 * OUTPUTS:
 *  Returns FALSE if we are not certain that the program is dead
 *  if certain true.
 */
BOOLEAN nannyKillProgram(char *name)
{
  rcProgramPtr r;
  char buffer[DEFAULT_LINE_LENGTH];
  
  DEBUG1("BOOLEAN nannyKillProgram(char *name)\n"); 
  if (rcLookupName(name) == -1)
    {
      NWARNING2("nannyKillProgram: Attempting to kill non-existent %s\n", name);
      return FALSE;
    }
  
  r = &(rcAvailableProgramList[rcLookupName(name)]);
  
  if (r->executing) {
    if (r->local) 
      nannyKillLocalProgram(r);
    else {
      messageMakeKill(buffer, name);
      robustPassMessage(r->remoteMachine, buffer);
      return FALSE; /* Think negative until told otherwise */
    }
  }
  
  return TRUE;
}

/* DESCRIPTION:
 *   Seeks out child processes which have the same parent as name, and kills
 *   them. 
 *
 * INPUTS:
 *  
 *
 * OUTPUTS:
 */
BOOLEAN nannyDestroyProgram(char *name)
{
  FILE *fd;
  rcProgramPtr r;
  char *lptr, line[143]; /* this is what the -w option gives us */
  char prog[10],pid[10], dummy[20];
  int  foundpids[20], nofound=0;
  char buffer[DEFAULT_LINE_LENGTH];
  void (*handler)(void); /* need to catch sigchld, as popen will trigger one */
  
  DEBUG1("BOOLEAN nannyDestroyProgram(char *name)\n");
  if (rcLookupName(name) == -1)
    {
      NWARNING2("nannyDestroyProgram: Attempting to destroy parents of non-existent %s\n",
		name);
      return FALSE;
    }
  
  r = &(rcAvailableProgramList[rcLookupName(name)]);
  
  if (r->local) {
    /* This means that it won't be restarted until 
     * an explicit restart is requested.
     */
    r->timeouts = r->max_restarts; 

    sprintf(buffer, "ps auwx | egrep %s", getenv("USER"));
    
    handler = signal(SIGCHLD, SIG_IGN);
  
    if ((fd = popen(buffer, "r"))==NULL)  {
      NWARNING1("nannyDestroy: Unable to fork popen.\n");
      signal(SIGCHLD, handler);
      return FALSE;
    }
    
    if (psGetLine(line, 132, fd)==NULL) {
      pclose(fd);
      NWARNING1("nannyDestroy: ps returned empty.\n");
      signal(SIGCHLD, handler);
      return FALSE;
    }

    while (!feof(fd) && psGetLine(line, 132, fd)) {
      lptr = psExtractKey(line, prog, 8);   
      lptr = psExtractKey(lptr, pid, 6);        /* PID */
      lptr = psExtractKey(lptr, dummy, 5);      /* %CPU */
      lptr = psExtractKey(lptr, dummy, 5);      /* %MEM */
      lptr = psExtractKey(lptr, dummy, 5);      /* %SS */
#if defined(linux)
      lptr = psExtractKey(lptr, dummy, 10);      /* RSS & TTY */
#else
      lptr = psExtractKey(lptr, dummy, 9);      /* RSS & TT */
#endif
      lptr = psExtractKey(lptr, dummy, 4);       /* STAT */
      lptr = psExtractKey(lptr, dummy, 13);      /* START & CPU */
      /* program running */
      /* Slight variablilty between Linux and Sunos */
      lptr = psExtractKey(lptr, dummy, strlen(r->executable)+4);
      
      /* 
       * Note that we really only care about programs run from nanny, 
       * and these should exactly match r->executable
       * Hmm, unix sometimes throws weird stuff in front, so back to 
       * partial matching.
       */
      if (strstr(dummy,r->executable) != NULL) {
	if (addpid(foundpids,&nofound,atoi(pid))) {
	  int killPid = atoi(pid);
	  
	  NWARNING1("Trying to kill...\n");
	  
	  kill (killPid, 9);
	  {
	    int status;
	    
	    waitpid(killPid, &status, FALSE);
	  }
	}
      }
    }
    pclose(fd);
    signal(SIGCHLD, handler);
  } else {
    messageMakeDestroy(buffer, name);
    robustPassMessage(r->remoteMachine, buffer);
    return FALSE; /* Think negative until told otherwise */
  }
  
  return TRUE;
}

/*****************************************************************************
 * !!Utility functions for running of processes
 *****************************************************************************/

/* DESCRIPTION:
 *   A list of arguments from the resource file are massaged to 
 *   be suitable for passing to exec
 *
 * INPUTS:
 *   arg0 is the name of the program
 *   n    in the number of arguments 
 *   argrest is the rest of arguments, from the resource file
 *
 * OUTPUTS:
 *   a ptr to an arg list suitable for passing to exec
 */
static char **makeArgs(char *arg0, int n, char **argrest)
{
  static char **args;
  int  i;

  args = (char **) malloc ((n + 2) * (sizeof (char *)));
  args[0] = arg0;
  for (i=0; i<n; i++)
    args[i+1] = argrest[i];
  args[n+1] = NULL;
  
  return args;
}

/* DESCRIPTION:
 *  exec complains if there are duplicates in the environment list.
 *  this does a comparision to test matching tokens in the list.
 *
 * INPUTS:
 *  testThis: an env variable.
 *  envs: a list of env variables.
 * 
 * OUTPUTS:
 *  TRUE if testThis is found in envs.
 */
static BOOLEAN conflictingEnv(char *testThis, char **envs)
{
  int  i, len;
  
  if (testThis == NULL) return FALSE;
  len = strcspn(testThis,"=");
  if (len<1) return FALSE;
  
  for (i=0; envs[i]!=NULL; i++) {
    if (strncmp(testThis, envs[i], len) == 0)
      return TRUE;
  }
  
  return FALSE;
}

/* DESCRIPTION:
 *   We append our new environment variables to the environ list
 *   We got to ensure no duplicates, otherwise the lowlevel c runtime
 *   initialization will complain.
 *
 * INPUTS:
 *   n    in the number of new env variablesarguments 
 *   envs is the list of these, from the resource file
 *
 * OUTPUTS:
 *   a ptr to an env list suitable for passing to exec
 */
static char **makeEnvs(int n, char **envs)
{
  static char **lenviron;
  int  i, index, size, conflicts;
  
  /* check conflicts */
  for (conflicts=0, size=0; environment[size]!=NULL; size++)
    if (conflictingEnv(environment[size], envs))
      conflicts++;
  
  lenviron = (char **) malloc ((size+n+1-conflicts) * (sizeof (char *)));
  for (i=0, index=0; i<size; i++)
    if (!conflictingEnv(environment[i], envs))
      lenviron[index++] = environment[i];
  for (i=0; i<n; i++) 
    lenviron[index++] = envs[i];
  lenviron[index] = NULL;
  
  return lenviron; 
}

/* DESCRIPTION:
 *  
 * INPUTS:
 *
 * OUTPUTS:
 */
static BOOLEAN readyStringSatisfied(rcProgramPtr r)
{
  DEBUG1("static BOOLEAN readyStringSatisfied(rcProgramPtr r)\n");
  if (r->ready_string == NULL)
    return TRUE;
  
  return (strstr(r->stdinBuffer,r->ready_string) != NULL);
}


/*****************************************************************************
 * !!Scheduling/Running of processes
 *****************************************************************************/

/* DESCRIPTION:
 *   Try and figure out why "name" can't start 
 *   Main assumption is that a program won't run if it is already running (probably
 *   due to someone else running it, or a broken nanny)
 * 
 * INPUTS:
 *   A program name.
 *
 * OUTPUTS:
 * 
 */
static void nannyDiagnozeNonExec(char *name)
{
  rcProgramPtr r;
  FILE *fd;
  char command[120];
  char *lptr, line[133]; /* this is what the -w option gives us */
  char dummy[10];
  void (*handler)(void);
  
  DEBUG1("static void nannyDiagnozeNonExec(char *name)\n");
  line[0] = '\0';
  r = &(rcAvailableProgramList[rcLookupName(name)]);
  
  sprintf(command, "ps auwx");
  
  handler = signal(SIGCHLD, SIG_IGN);
  
  if ((fd = popen(command, "r"))==NULL) {
    NWARNING1("nannyDiagnozeNonExec: Unable to fork popen.\n");
    signal(SIGCHLD, handler);
    return;
  }
  
  if (psGetLine(line, 132, fd)==NULL) {
    pclose(fd);
    NWARNING1("nannyDiagnozeNonExec: ps returned empty\n");
    signal(SIGCHLD, handler);
    return; 
  }
  
  while (!feof(fd) && psGetLine(line, 132, fd)) {
    if (strstr(line,r->executable) != NULL) {
      NWARNING2("%s already seems to be running\n", name);
      NWARNING2("**%s**\n", line);
      lptr = psExtractKey(line, dummy, 8);   
      NWARNING2("Tell %s ", dummy);
      lptr = psExtractKey(lptr, dummy, 6);      /* PID */
      NWARNING2("to do \"kill -9 %s\".\n", dummy);
      pclose(fd);
      signal(SIGCHLD, handler);
      return;
    }
  }
  pclose(fd);
  signal(SIGCHLD, handler);
  return;
}

/* DESCRIPTION:
 *   We have realized that name can't execute yet, since 
 *   it depends on other processes to be running. So mark that
 *   we want it to be running, and poll till its ready...
 *
 * INPUTS:
 *   program to be queued
 *
 * OUTPUTS:
 */
static void nannyAddToExecutingWishlist(char *name)
{
  struct timeval wait_timeout = {SUB_TIMEOUT,0};
  rcProgramPtr r;
  
  DEBUG1("static void nannyAddToExecutingWishlist(char *name)\n");
  NWARNING2("Dependencies required for: %s\n", name);
  r = &(rcAvailableProgramList[rcLookupName(name)]);
  
  r->wantToExecute = TRUE;
  
  if (currentlyWaitingForProgram == -1)
    devStartPolling(stdin_device, &wait_timeout, nannyCheckWishlist,NULL);
}

/* DESCRIPTION:
 *   Test to see if a program that we are waiting for is ready to 
 *   run, where ready is defined as UNIX has it running, and
 *   the ready string (if one exists) has been seen.
 *
 * INPUTS:
 *   r: resource pointer to the program we are testing
 *   print: We may do this several times, only print when we are 
 *          about to give up (as is decided else where)
 *
 * OUTPUTS:
 *   TRUE if program is ready to run, and dependencies can be fired.
 */
static BOOLEAN nannyProgramReadyp(rcProgramPtr r, BOOLEAN print)
{  
  processInfoType p;
  BOOLEAN ready = FALSE;
  
  DEBUG1("static BOOLEAN nannyProgramReadyp(rcProgramPtr r, BOOLEAN print)\n");
  if (r->local) {
    /* 
     * Don't want nannyPs to be called excessively 
     * - the runabble flag has been added to reduce 
     * such overhead
     */
    if (nannyPs(r->pid, &p)) {
      if (!psRunnablep(&p)) {
	if (print)
	  NWARNING2("%s not yet executing.\n", r->name);
      } else {
	ready = r->ready;
	if (!ready && print)
	  NWARNING2("%s executing, but not yet ready.\n", r->name);
      }
    }
  }
  else
    ready = r->executing; /* This will be set by an remote announcement */
  
  return ready;
}

/* DESCRIPTION:
 *   This is a handler called by devUtils when programs are waiting
 *   for dependincies to execute. It checks the waiting programs, sees
 *   if their dependancies have been satisfied, and if so, lets them
 *   be executed. When there are no more waiting programs, it
 *   stops checking.
 *
 * INPUTS:
 *   devUtils will fill two pointers, which we ignore. 
 *
 * BUGS: 
 *   RETRIES is shared amoung many programs? - should have 
 *   and individual RETRY for each process.
 */
void nannyCheckWishlist(Pointer ignore1, Pointer ignore2)
{
  rcProgramPtr r;
  static int RETRIES = 0;
  int i;
  BOOLEAN dependent[rcNumAvailablePrograms], conflict[rcNumAvailablePrograms];
  
  DEBUG1("void nannyCheckWishlist(Pointer ignore1, Pointer ignore2)\n");
  if (currentlyWaitingForProgram != -1) {
    r = &(rcAvailableProgramList[currentlyWaitingForProgram]);
    
    if (nannyProgramReadyp(r, ((RETRIES%SUB_TIMEOUT)==0))) {
      char  ack[MAXBUF]; 
      
      NWARNING2("%s executing, and ready.\n", r->name);
      currentlyWaitingForProgram = -1;
      r->executing = TRUE;
      r->wantToExecute = FALSE;
      
      messageMakeAnnounce(ack, r->name, r->ready);
      nannyReportDualMessage(ack);
    }
    else
      RETRIES++;
    
    if (RETRIES > MAX_TIMEOUT_TRYS) {
      nannyDiagnozeNonExec(r->name);
      r->wantToExecute = FALSE;
      currentlyWaitingForProgram = -1;
    }
  }
  
  if (currentlyWaitingForProgram == -1) {
    /*
     * If we aren't waiting for the report of a ready program, 
     * schedule a new one 
     */
    for (i=0; i<rcNumAvailablePrograms; i++) {
      r = &(rcAvailableProgramList[i]);
      if ((r->wantToExecute) && (!r->executing)) {
	currentlyWaitingForProgram = i;
      }
    }

    if (currentlyWaitingForProgram == -1) /* Nothing to Execute */
      devStopPolling(stdin_device);
    else {
      int candidate;
      
      r = &(rcAvailableProgramList[currentlyWaitingForProgram]);
      rcGenerateDependencies(r->name,dependent,conflict);
      
      candidate = rcExistDepOrCon(dependent,FALSE); 
      if (candidate != -1) {
	char  displayenv[DEFAULT_LINE_LENGTH]; 
	char  display[DEFAULT_LINE_LENGTH]; 
	
	currentlyWaitingForProgram = candidate;
	/* It inherits the display of its creator */
	sprintf(displayenv, "DISPLAY=%s", r->display);
	strcpy(display, r->display);
	r = &(rcAvailableProgramList[currentlyWaitingForProgram]);
	rcInsertStr(rcLookupName(r->name), rcEntENVS, displayenv);
	rcInsertStr(rcLookupName(r->name), rcEntDISPLAY, display);
      } else if (!rcDependsOnProcess(r)) {
	/* Don't block waiting for this process if nothing depends on it */
	currentlyWaitingForProgram = -1;
      }
      
      RETRIES = 0;
      NWARNING2("Scheduling: %s\n", r->name);
      nannyExecProgram(r->name);
    }
  }
  
  return;
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
static char terminalName[16];

static BOOLEAN nannyRunLocalProgram(rcProgramPtr r)
{
  int id[2], ed[2], processId, ptty, i;
  char **envs, **args;
  char  ack[MAXBUF];
  
  DEBUG1("static BOOLEAN nannyRunLocalProgram(rcProgramPtr r)\n");
  r->executing = TRUE; 
  r->wantToExecute = FALSE; 
  r->ready = FALSE;

  /* reids: Open a pseudo-terminal for stdin and stdout.  Use pseudo-terminal 
   *        to get the same interactivity (buffering) as a real terminal.
   *        Use a pipe for stderr, to differentiate between stdout and stderr 
   */
  ptty = openParentPseudoTerminal();
  if (ptty < 0) {
    fprintf(stderr, "CANNOT OPEN PSEUDO-TERMINAL\n"); fflush(stderr);
    return FALSE;
  }
  if (pipe(ed) == -1 || pipe(id) == -1) {
    fprintf(stderr, "PIPE FAILURE\n"); fflush(stderr);  perror("pipe");
    return FALSE;
  }
  switch (processId = fork()) {
  case -1:
    fprintf(stderr, "FORK FAILURE\n"); fflush(stderr);  perror("fork");
    close(ed[0]); close(ed[1]); close(id[0]); close(id[1]); close(ptty);
    return FALSE;
  case 0:			/* This is the child process executing */
    {
      ptty = openChildPseudoTerminal(ptty);
      dup2(ptty, fileno(stdout));
      /*
       * reids: For some reason, it does not work to have the ptty be connected
       *        to both stdin and stdout.  Don't know what I'm doing wrong 
       */
      dup2(id[0],fileno(stdin));
      dup2(ed[1],fileno(stderr));
      
      /* Close the rest of the open files */
      for (i=3; i<NOFILE; i++) {
	close(i); /* Ignoring errors merrily */
      }
      
      /* Move to the correct dir.. */
      if (chdir(r->pathToExecuteFrom)==-1) {
	NWARNING2("Chdir failed for %s. Check Resource File\n", r->name);
	exit(0);
      }
      args = makeArgs(r->executable, r->noargs, r->args);
      envs = makeEnvs(r->noenvs, r->env);
      
      execve (r->executable, args, envs);
      perror(r->name); 
      exit(0);
    }
  default:		/* This is the parent process executing */
    r->pid = processId;
    r->fdstdin  = id[1];
    r->fdstdout = ptty;
    r->fdstderr = ed[0];
    close(ed[1]);
    DEBUG1("Adding new connection");
    DEBUG5(" %d %d %d %s\n",r->fdstdin,r->fdstdout,r->fdstderr, r->name);
    Nanny_addFd(r->fdstdin,r->fdstdout,r->fdstderr, r->name);

    r->ready = (!r->ready_string);
    messageMakeAnnounce(ack, r->name, r->ready);
    nannyReportDualMessage(ack);
  }
  return TRUE;
}

/* DESCRIPTION:
 * Hmm - we can't spin inside the ProcessDevices loop like the
 * oldNannyExecProgram used do.  Instead, am going to set a devUtils
 * process spinning to handle unfinished and blocking tasks.  
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
BOOLEAN nannyExecProgram(char *name)
{
  rcProgramPtr r;
  BOOLEAN dependent[rcNumAvailablePrograms],
  conflict[rcNumAvailablePrograms];
  int i;
  
  DEBUG1("BOOLEAN nannyExecProgram(char *name)\n");
  if (rcLookupName(name) == -1) return FALSE; 
  
  r = &(rcAvailableProgramList[rcLookupName(name)]);
  
  if (!r->executing) {
    rcGenerateDependencies(name,dependent,conflict);
    
    if ( rcExistDepOrCon(dependent,FALSE) != -1) 
      nannyAddToExecutingWishlist(name);
    else {
      while ( (i=rcExistDepOrCon(conflict,TRUE)) != -1) 
	nannyKillProgram(rcAvailableProgramList[i].name);
      
      if (r->local) {
	return nannyRunLocalProgram(r);
      } else {
	char syscall[DEFAULT_LINE_LENGTH];
	r->executing = FALSE; /* Think negative until told otherwise */
	
	messageMakeExec(syscall, name, r->display); 
	robustPassMessage(r->remoteMachine, syscall);
      }
    }
  }
  return TRUE;
}


/*****************************************************************************
 * !!pseudoTerminals
 *****************************************************************************/
static char terminalName[16];

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
static int openParentPseudoTerminal(void)
{
  int ptty;
  char *s, *t;

  DEBUG1("static int openParentPseudoTerminal(void)\n");
  /* reids: Open the "master" pseudo-terminal by cycling through all the 
   *        possible names.  UNIX will not let us open the same terminal 
   *	    twice 
   */
  for (s="pqrs"; *s!='\0'; s++) {
    for (t="0123456789abcdef"; *t!='\0'; t++) {
      sprintf(terminalName, "/dev/pty%c%c", *s, *t);
      ptty = open(terminalName, O_RDWR);
      if (ptty >= 0)
	return ptty;
    }
  }
  return -1;
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
static int openChildPseudoTerminal (int parentTTY)
{
  int ptty, tty, pid;
  long ldisc, lmode;
  struct sgttyb sgttyb;
  struct tchars tchars;
  struct ltchars ltchars;

  DEBUG1("static int openChildPseudoTerminal (int parentTTY)\n");
  /* Get the modes of the current terminal, to use for the pseudo-terminal */
  ioctl(fileno(stdout), TIOCGETD, &ldisc);
  ioctl(fileno(stdout), TIOCLGET, &lmode);
  ioctl(fileno(stdout), TIOCGETP, &sgttyb);
  ioctl(fileno(stdout), TIOCGETC, &tchars);
  ioctl(fileno(stdout), TIOCGLTC, &ltchars);

  /* Clear the controlling tty, and make the pseudo-terminal the new
     controlling tty. */
  tty = open("/dev/tty", O_RDWR);
  if (tty >= 0) {
    ioctl(tty, TIOCNOTTY, 0);
    close(tty);
  }
  terminalName[5] = 't';
  ptty = open(terminalName, O_RDWR);

  /* Set the terminal modes.
     Don't set ECHO, since we don't want characters to be "typed" back
     Don't set CRMOD, since we don't want fancy EOL processing */
  sgttyb.sg_flags &= ~O_ECHO;
  sgttyb.sg_flags &= ~O_CRMOD;
  ioctl(ptty, TIOCSETD, &ldisc);
  ioctl(ptty, TIOCLSET, &lmode);
  ioctl(ptty, TIOCSETP, &sgttyb);
  ioctl(ptty, TIOCSETC, &tchars);
  ioctl(ptty, TIOCSLTC, &ltchars);

  /* Set the process group of the child process and the process group of
     the pseudo-terminal */
  pid = getpid();
#ifdef linux /* I have no idea if this is correct for linux machines. */
  setpgid(pid,pid);
#else
  setpgrp(pid, pid);
#endif
  ioctl(ptty, TIOCSPGRP, &pid);

  return ptty;
}


/*****************************************************************************
 * !!child management
 *****************************************************************************/
/* DESCRIPTION:
 *   The scenario - a child process is dead. What to do?
 *   Care have been taken that the only non caught child processes
 *   are those processes created by nannyRunLocal. 
 *
 * INPUTS:
 *   sig - signal fills this in with the signal which has been fired
 *         this can only be a sigchld, since I haven't trapped any others.
 *
 * OUTPUTS:
 */
void nannyCatchChild(int sig)
{
  int i, pid;
  union wait status;
  
  DEBUG1("void nannyCatchChild(int sig)\n");
  /* Ignore the signal for the duration of this routine.
   * Must really be the furst thing that happens here
   */
  signal(SIGCHLD, SIG_IGN);  
  
  if (sig != SIGCHLD) 
    NWARNING2("Unknown Signal %d received\n",sig);
  /* even if unknown, really no harm in continuing */
  
  /* The process which caused the SIGCHLD */
  pid = wait4(0,&status, WUNTRACED, 0);
  DEBUG2("%d is dead\n",pid);

  /* Find out what happened */
  if (WIFSTOPPED(status)) { 
    DEBUG1("Notified of a sleeping child, hope the syscall is restartable...\n");
  } else {
    if ((WIFEXITED(status)) || (WIFSIGNALED(status))) {
      for (i=0; i<rcNumAvailablePrograms; i++) {
	processInfoType p;
	rcProgramPtr r = &(rcAvailableProgramList[i]);
	
	/* We need to check not only this killed process, but also, 
	 * all other running processes, in case they died too...
	 */	       
	/* It happens that if something dies locally, 
	   yet a remote process tells us about it, 
	   we get 
	   if ((r->executing && r->local) &&*/
	if ((r->executing && r->local) &&
	    ((r->pid == pid) ||
	     (!nannyPs(r->pid, &p)) ||
	     (!psRunnablep(&p)))) {
#if 0
	  /*Do NOT close them here - let devUtils handle it with readNChar...*/
	  if (!Nanny_removeFd(r->fdstdin))
	    NWARNING2("nannyCatchChild: r->fdstin already deleted for %s\n",r->name);

	  close(r->fdstdout);
	  close(r->fdstdin);
	  close(r->fdstderr);

	  if (!Nanny_removeFd(r->fdstdout))
	    NWARNING2("nannyCatchChild: r->fdstout already deleted for %s\n",r->name);
	  if (!Nanny_removeFd(r->fdstderr))
	    NWARNING2("nannyCatchChild: r->fdsterr already deleted for %s\n",r->name);
#endif
	  r->executing = FALSE;
	  if (r->timeouts < r->max_restarts) {
	    NWARNING2("Restarting %s\n",r->name);
	    r->timeouts++;
	    nannyAddToExecutingWishlist(r->name);
	    /* nannyExecProgram(r->name); */
	  } else {
	    NWARNING2("Giving up on %s\n",r->name);
	    r->timeouts = 0;
	  }
	}
      }
    }
    else
      DEBUG1("Something weird happened in SIGCHILD\n");
  }
  
  /* 
   * reset the signal so as to come back here again
   */
  signal (SIGCHLD, nannyCatchChild); 
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyShutdown(void)
{
  int i;
  rcProgramPtr r;
  
  DEBUG1("void nannyShutdown(void)\n");
  for (i=0; i<rcNumAvailablePrograms; i++) {
    r = &(rcAvailableProgramList[i]);
    
    if (r->executing)
      nannyKillProgram(r->name);
  }
  
  devShutdown();
}

/* DESCRIPTION:
 *   Can both save output to an error log local to nanny, and also 
 *   will attempt to send the error back to runConsole
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyReportMessage(char *message)
{
  static char *thisHost= NULL;
  
  DEBUG1("void nannyReportMessage(char *message)\n");
  if (thisHost == NULL) {
    int i;
    
    thisHost = (char *)malloc(80);
    gethostname(thisHost, 80);  
    for (i = 0; i<80; i++)
      if (thisHost[i] == '.')
	thisHost[i]='\0';
  }
  
  if (numConnects) {
    int i;
    connectInfoPtr c;
    char buf[MAXBUF];
    
    messageMakeInfo(buf, thisHost , TRUE, message);
    for (i=numConnects-1; i>=0; i--) {
      c = &NotedConnectsList[i];
      
      if (c->remote)
	robustPassMessage(c->machine, buf);
      else
	robustPassLocalReply(c->fd, buf);
    }
    DEBUG1(message);
  } else {
    fprintf(stderr, message);
  }
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyReportRemoteMessage(char *message)
{
  int i;
  connectInfoPtr c;
  
  DEBUG1("void nannyReportRemoteMessage(char *message)\n");
  if (numConnects) {
    for (i=numConnects-1; i>=0; i--) {
      c = &NotedConnectsList[i];
      if (c->remote) {
	robustPassMessage(c->machine, message);
      }
    }
  } else {
    fprintf(stderr, message);
  }
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyReportLocalMessage(char *message)
{
  DEBUG1("void nannyReportLocalMessage(char *message)\n");
  if (numConnects) {
    int i;
    connectInfoPtr c;
    
    for (i=numConnects-1; i>=0; i--) {
      c = &NotedConnectsList[i];
      
      if (!c->remote) {
	robustPassLocalReply(c->fd, message);
      }
    }
  } else {
    fprintf(stderr, message);
  }
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyReportDualMessage(char *message)
{
  int i;
  DEBUG1("void nannyReportDualMessage(char *message)\n");
  
  if (numConnects) {
    connectInfoPtr c;
    /* RTG- count down because items are removed from
     * the list if there is an error.
     */
    for (i=numConnects-1; i>=0; i--) {
      c = &NotedConnectsList[i];
      
      if (c->remote)
	robustPassMessage(c->machine, message);
      else
	robustPassLocalReply(c->fd, message);
    }
  }
  else
    fprintf(stderr, message);
}

/* DESCRIPTION:
 * returns TRUE if pid is not present in foundpids
 * updating foundpids with pid, and incrementing nofound
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
BOOLEAN addpid(int *foundpids, int *nofound, int pid)
{
  int i;
  DEBUG1("BOOLEAN addpid(int *foundpids, int *nofound, int pid)\n");
  
  for (i=0; i < (*nofound); i++)
    if (foundpids[i]==pid)
      return FALSE;
  
  if ((*nofound) == 20)
    return TRUE;
  
  foundpids[(*nofound)] = pid;
  (*nofound)  =  (*nofound)+1;
  return TRUE;
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
static int getbool(char *prompt, int defalt)
{
  register int valu;
  register char ch;
  char input [100];

  DEBUG1("static int getbool(char *prompt, int defalt)\n");
  fflush (stdout);
  if (defalt != TRUE && defalt != FALSE)  defalt = TRUE;
  valu = 2;				/* meaningless value */
  do {
    fprintf (stderr,"%s  [%s]  ",prompt,(defalt ? "yes" : "no"));
    fflush (stderr);			/* in case it's buffered */
    if (gets (input) == NULL) {
      valu = defalt;
    }
    else {
      ch = *input;			/* first char */
      if (ch == 'y' || ch == 'Y')		valu = TRUE;
      else if (ch == 'n' || ch == 'N')	valu = FALSE;
      else if (ch == '\0')		valu = defalt;
      else fprintf (stderr,"Must begin with 'y' (yes) or 'n' (no).\n");
    }
  }
  while (valu == 2);			/* until correct response */
  return (valu);
}

/* DESCRIPTION:
 * This runs through the possible resources, and checks to see that
 * that none are running. If they are, and clean is set to TRUE, it
 * will _try_ to kill em off.
 *
 * We assume that this is called before nanny proper is run. (As in, 
 * none of the processes have been started by nanny
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyCheckEmptySystem(BOOLEAN clean, BOOLEAN prompt)
{
  int i;
  FILE *fd;
  rcProgramPtr r;
  char command[120];
  char *lptr, line[143]; /* this is what the -w option gives us */
  char name[10], pid[10], dummy[20];
  int  foundpids[20], nofound=0;
  
  DEBUG1("void nannyCheckEmptySystem(BOOLEAN clean, BOOLEAN prompt)\n");
  for (i=0; i<rcNumAvailablePrograms; i++) {
    r = &(rcAvailableProgramList[i]);
    
    sprintf(command, "ps auwx | egrep %s", getenv("USER"));
    
    if ((fd = popen(command, "r"))==NULL)
      return;
    
    psGetLine(line, 132, fd);
    
    while (!feof(fd) && psGetLine(line, 132, fd)) {
      lptr = psExtractKey(line, name, 8);   
      lptr = psExtractKey(lptr, pid, 6);        /* PID */
      lptr = psExtractKey(lptr, dummy, 5);      /* %CPU */
      lptr = psExtractKey(lptr, dummy, 5);      /* %MEM */
      lptr = psExtractKey(lptr, dummy, 5);      /* %SS */
#if defined(linux)
      lptr = psExtractKey(lptr, dummy, 10);      /* RSS & TTY */
#else
      lptr = psExtractKey(lptr, dummy, 9);      /* RSS & TT */
#endif
      lptr = psExtractKey(lptr, dummy, 4);       /* STAT */
      lptr = psExtractKey(lptr, dummy, 13);      /* START & CPU */
      /* program running */
      /* Slight variablilty between Linux and Sunos */
      lptr = psExtractKey(lptr, dummy, strlen(r->executable)+4);
      
      /* 
       * Note that we really only care about programs run from nanny, 
       * and these should exactly match r->executable
       * Hmm, unix sometimes throws weird stuff in front, so back to 
       * partial matching.
       */
      if (strstr(dummy,r->executable) != NULL) {
	if (addpid(foundpids,&nofound,atoi(pid))) {
	  NWARNING2("\"%s\" running", r->executable);
	  NWARNING2(" with pid %s, ", pid);
	  if (clean) {
	    processInfoType p;
	    int killPid = atoi(pid);
	    BOOLEAN doKill=FALSE;
	    
	    if (!prompt) {
	      doKill = TRUE;
	    } else {
	      doKill = getbool("Kill Process?", FALSE);
	    }
	    if (doKill) {
	      NWARNING1("Trying to kill...\n");
	      
	      kill (killPid, 9);
	      {
		int status;
		
		waitpid(killPid, &status, FALSE);
	      }
	      if ((!nannyPs(killPid, &p)) && (!psRunnablep(&p))) {
		NWARNING1("Successful\n");
	      } else {
		NWARNING1("Failed.\n");
	      }
	    }
	  } else {
	    NWARNING2("Do \"kill -9 %s\", or rerun with -clean.\n", pid);
	  }
	}
      }
    }
    pclose(fd);
  }
}

/*****************************************************************************/
/* connection management */
/*****************************************************************************/

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyConnectList(FILE *fd)
{
  int i;
  connectInfoPtr c;
  
  DEBUG1("void nannyConnectList(FILE *fd)\n");
  if (numConnects)
    fprintf(fd, "\nAware of connections with...\n");
  else
    fprintf(fd, "\nNo connects yet established\n");
  for (i=numConnects-1; i>=0; i--) {
    c = &NotedConnectsList[i];
    if (c->remote)
      fprintf(fd,"\tRemote Machine: %s\n", c->machine);
    else
      fprintf(fd,"\tLocal Console on: %d\n", c->fd);
  }
  fflush(fd);
}

void nannyConnectNote(BOOLEAN remote, char *name, int fd)
{
  connectInfoPtr c;
  
  DEBUG1("void nannyConnectNote(BOOLEAN remote, char *name, int fd)\n");
  if (!nannyConnectNotedp(remote,name,fd)) { 
    if (numConnects == 0)
      NotedConnectsList = (connectInfoPtr) malloc(sizeof(connectInfoType));
    else
      NotedConnectsList = (connectInfoPtr) 
	realloc(NotedConnectsList,
		(numConnects+1)* sizeof(connectInfoType));
    
    c = &NotedConnectsList[numConnects];
    
    c->remote = remote;
    if (remote) {
      strcpy(c->machine,name);
      {
	/*
	 * We need to inform a machine as early as possible, 
	 * otherwise dependency problems will occur...
	 */
	rcProgramPtr r;
	int id;
	char  ack[MAXBUF]; 
	
	for (id=0; id< rcNumAvailablePrograms; id++) {
	  r = &(rcAvailableProgramList[id]);
	  if (r->executing) {
	    messageMakeAnnounce(ack, r->name, r->ready);
	    robustPassMessage(c->machine,ack);
	  }
	}
      }
    }
    else
      c->fd = fd;
    
    numConnects++;
    
#ifdef DEBUG
    nannyConnectList(stderr);
#endif
  }
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void nannyConnectUnnote(BOOLEAN remote, char *name, int fd)
{
  int i, index;
  connectInfoPtr c, cnew, nList = NULL;
  
  DEBUG1("void nannyConnectUnnote(BOOLEAN remote, char *name, int fd)\n");
  if (nannyConnectNotedp(remote,name,fd)) { 
    nList = (connectInfoPtr) malloc((numConnects-1)*sizeof(connectInfoType));
    
    for (i=0,index=0; i<numConnects; i++) {
      c = &NotedConnectsList[i];
      cnew = &nList[index];
      
      if (!((c->remote && remote && (!strcmp(c->machine,name))) ||
	    ((!c->remote && !remote && (fd == c->fd))))) {
	cnew->remote = c->remote;
	if (c->remote) 
	  strcpy(cnew->machine,c->machine);
	else
	  cnew->fd = c->fd;
	index++;
      }
    }
    numConnects--;
    free(NotedConnectsList);
    NotedConnectsList = nList;
    
#ifdef DEBUG 
    nannyConnectList(stderr);
#endif
  }
  return;
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
BOOLEAN nannyConnectNotedp(BOOLEAN remote, char *name, int fd)
{
  int i;
  connectInfoPtr c;
  
  DEBUG1("BOOLEAN nannyConnectNotedp(BOOLEAN remote, char *name, int fd)\n");
  for (i=0; i<numConnects; i++) {
    c = &NotedConnectsList[i];
    if (c->remote && remote) {
      if ((strstr(c->machine,name) != NULL) ||
	  (strstr(name,c->machine) != NULL))
	return TRUE;
    } else {
      if (!c->remote && !remote && fd == c->fd)
	return TRUE;
    }
  }
  return FALSE;
}



/*****************************************************************************
 * !!messagePassing - wrapper functions which check for closed connections 
 *****************************************************************************/

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void robustPassMessage(char *machine, char *message)
{
  DEBUG1("void robustPassMessage(char *machine, char *message)\n");
  if (!passMessage(machine,message))
    nannyConnectUnnote(TRUE, machine, -1);
}

/* DESCRIPTION:
 *
 * INPUTS:
 *
 * OUTPUTS:
 */
void robustPassLocalReply(int id, char *message)
{
  DEBUG1("void robustPassLocalReply(int id, char *message)\n");
  if (!passLocalReply(id,message))
    nannyConnectUnnote(FALSE, "ERROR", id);
}
