
;;===========================================================================
;; Induction utility routines for NEITHER 
;;
;; -------------------------------------------------------------------------
;; AUTHORS: Paul T. Baffes.
;; Copyright (c) 1992 by AUTHORS. This program may be freely copied, used,
;; or modified provided that this copyright notice is included in each copy
;; of this code and parts thereof. 
;; -------------------------------------------------------------------------
;;
;; This file provides a few routines useful for making the correct calls to
;; an Induction tool (either ID3 or PFOIL) to generate new rules and
;; incorporate them into the theory. There is no main routine here; outside
;; code will probably need to make calls to the following: induce-rules,
;; cnvt-to-standard-ex, patch-decision-tree, and  merge-with-current-theory.
;; Note that the latter two calls deal with NEITHER decision tree formats
;; (not ID3 formats), but they are stored here because they are part of the
;; incorporation of new rules into an existing set of rules stored in
;; NEITHER.
;;
;; Finally, there are two routines from io.lisp, theory-to-decision-tree and
;; convert-theory, which translate a lisp version of rules into the NEITHER
;; internal format. Thus, the general idea is to use the first two routines
;; here (cnvt-to-standard-ex and induce-rules) to create a lisp version of
;; the new rules, then convert these to NEITHER, and finally merge them into
;; an existing theory (see add-pos-inductions routine in generalize.lisp).
;;
;; CHANGE HISTORY
;;
;; 11-JAN-93: (ptb) had to add a threshold argument to the induce-rules code
;;            so it could handle threshold rules.
;; 23-FEB-93: (ptb) took some of the routines out of here and put them into
;;            a new file called "id3-glue.lisp" so that I could use ID3 or a
;;            different induction mechanism called PFOIL. PFOIL has its own
;;            file called "pfoil-glue.lisp".
;; 08-APR-93: (ptb) Added a reference to *neither-examples* for printout 
;;            purposes when debugging. Now, the "cnvt-to-standard-ex" routine
;;            can tell you which examples it is passing off to induction. 
;;            Note that there should NOT be any example which is both a 
;;            positive and negative passed to induction!!
;;===========================================================================

(in-package #+:cltl2 "CL-USER" #-:cltl2 "USER")

;;===========================================================================
;; GLOBAL VARIABLE REFERENCE
;;
;; Merging two theories together requires that we don't try merging SUB parts
;; of a theory more than once. To avoid this circular-list-creating bug, I
;; make use of the *deduction-marker* global variable. It's a bit of a
;; misnomer since this code is not doing any deduction, but the principle is
;; the same, i.e., that of not traversing parts of a theory more than once.
;; So, I import the same global and use it the same way deduce.lisp does. I
;; also have to import the *neither-theory* global so I can destructively
;; modify the current theory when merging.
;;
;; Finally, I need a reference to the *neither-examples* global for a print
;; out statement in cnvt-to-standard-ex. Note that I also need access to the
;; *trace-genl* and *trace-spec* globals from debug.lisp to tell the routine
;; when to print out.
;;===========================================================================
#+:cltl2(declaim (special *deduction-marker*)
                 (special *trace-genl*)
                 (special *trace-spec*)
                 (special *neither-theory*)
                 (special *neither-examples*))
#-:cltl2(progn
          (proclaim '(special *deduction-marker*))    ;; in deduce.lisp
          (proclaim '(special *trace-genl*))          ;; in debug.lisp
          (proclaim '(special *trace-spec*))          ;; in debug.lisp
          (proclaim '(special *neither-theory*))      ;; in io.lisp
          (proclaim '(special *neither-examples*)))   ;; in io.lisp


(defun cnvt-to-standard-ex (positives negatives)
  "Convert examples from NEITHER's format to a standard format."
  ;;-------------------------------------------------------------------------
  ;; Converts the positive and negative examples stored in NEITHER format to
  ;; the standard format used by Ray's induction code. This works both for
  ;; ID3 and PFOIL. The result is *ONE* list of examples.
  ;;
  ;; Works by nconc'ing together the list of converted positives with the
  ;; list of converted negatives. Since the negative examples may have many
  ;; different category names (for multi-category theories) each is
  ;; hard-coded to have the *negative-category* as its classification. All
  ;; positives are hard-coded to have "positive" as the category. That way,
  ;; the rules learned will simply separate the positives from the negatives.
  ;;
  ;; 08-APR-93 (ptb): added a big debugging statement to print out which 
  ;; example numbers were being passed to induction. This is extraordinarily
  ;; helpful when trying to find out why NEITHER is not consistent with a set
  ;; of training examples.
  ;;-------------------------------------------------------------------------
  (when (or *trace-genl* *trace-spec*)
    (format t "~%~%Induction being called with the following examples:")
    (loop for ex in positives 
          with still-search = t
          while still-search
          do
          (when (member ex negatives :test #'eq)
            (format t "~%~%*** WARNING: POTENTIAL NEITHER ERROR ***~%~%")
            (setf still-search nil)))
    (format t "~%  positives =")
    (loop for ex in positives do 
          (format t " ~D" (position ex *neither-examples*)))
    (format t "~%  negatives =")
    (loop for ex in negatives do 
          (format t " ~D" (position ex *neither-examples*))))
  (nconc
   (loop for i in positives
	 collect (list 'positive (example-values i)))
   (loop for i in negatives
	 collect (list 'negative (example-values i)))))


(defun induce-rules (examples threshold split-rule &optional (base-antes nil)
			      (desired-category 'positive))
  "Builds rules using induction. Returns only those rules that classify
examples into desired-category."
  ;;-------------------------------------------------------------------------
  ;; Builds rules using induction and the examples passed in. These come back
  ;; in a format dependent upon the type of induction called, and are then
  ;; translated into a rule format recognized as input to NEITHER. However,
  ;; only those rules whose consequent matches "desired-category" are kept.
  ;; This is because we are only interested in those rules that capture the
  ;; positive examples stored in "examples". So, both positive and negative
  ;; examples are passed in "examples", induction descriminates them, and we
  ;; keep the rules that classify the positives.
  ;;
  ;; update 9-9-92: by passing in "base-antes" which is then passed to the
  ;; translation routines, we can force all output rules to have some set of
  ;; antecedents. This is useful for creating rules that effectively add
  ;; antecedents to previous rules.
  ;;
  ;; 11-JAN-93 (ptb): added a threshold argument and passed it on to the 
  ;; translation routine .
  ;;
  ;; 14-JAN-93 (ptb): added "split-rule" argument to tell this routine to 
  ;; indicate whether or not to split the original rule into two parts. This
  ;; is necessary to handle m-of-n induction. In special cases, induction can
  ;; just add what it finds to the "base-antes" stuff passed in when creating
  ;; a new rule. This is how induction worked for the non m-of-n case.
  ;; However, in general, induction must create a new rule with an arbitrary
  ;; consequent, and then add this consequent to "base-antes". That is,
  ;; the general solution for induction is:
  ;;
  ;;    1.  make new rule in the form gensym <- base-antes
  ;;    2.  set base-antes = (gensym)
  ;;    3.  call transformation.
  ;;
  ;; (Note that the format of "base-antes" is a list of antecedents that are
  ;; in the same format in which antecedents appear in data files, e.g.
  ;; something like "(color red)"). There are two conditions under which the
  ;; above is not done. If the value of "split-rule" is nil, or if base-antes
  ;; is nil. In the former case, the calling routine has detected a special
  ;; event that obviates the need for splitting, and in the latter case the
  ;; rule from step 1 would be trivially provable (and can thus be avoided). 
  ;;
  ;; See also the documentation for "add-pos-inductions" (generalize.lisp) 
  ;; and "add-neg-inductions" (specialize.lisp) and the files for ID3
  ;; induction (id3-glue.lisp) and PFOIL induction (pfoil-glue.lisp).
  ;;
  ;; 23-JAN-93 (ptb): currently set to work with PFOIL.
  ;;-------------------------------------------------------------------------
  (if (and split-rule base-antes)
      (let ((new-conseq (gentemp "NEW-")))
        `((,threshold ,new-conseq <- ,@base-antes)
          ,@(pfoil-induce examples desired-category 0 `(,new-conseq))))
      (pfoil-induce examples desired-category threshold base-antes)))


(defun patch-decision-tree (rule-name tree)
  ;;-------------------------------------------------------------------------
  ;; Takes an existing decision tree, in NEITHER's internal format, and
  ;; updates the prop field of each top level vertex structure to be
  ;; "rule-name". Also, make sure to update the consequent field of all the
  ;; children of these vertices to be "rule-name".
  ;;
  ;; Essentially, all this routine does is change the consequent from the
  ;; word "positive" to whatever consequent gets passed in via rule-name. The
  ;; induction separates the positive examples from the negative ones because
  ;; all the positives are labeled "positive". The translated rules that get
  ;; passed in via the "tree" argument all still have "positive" as their
  ;; consequent.
  ;;-------------------------------------------------------------------------
  (loop for vert in tree
	if (eq (vertex-prop vert) 'positive)
	do (setf (vertex-prop vert) rule-name)
           (loop for child in (vertex-children vert)
                 do (setf (rule-consequent child) rule-name))
	finally (return tree)))


(defun merge-with-current-theory (newtheory)
  "Inserts the element of newtheory into the existing theory stored in the
*neither-theory* global. *neither-theory* is destructively modified."
  ;;-------------------------------------------------------------------------
  ;; Once induction outputs new rules, and those rules have been converted to
  ;; NEITHER's internal format, they must be merged with the existing domain
  ;; theory stored in *neither-theory*. This is done by looping through the
  ;; elements of the new theory and searching for the corresponding vertices
  ;; in *neither-theory* (implicit in the "find-vertex" call). If found, then
  ;; the children of the new theory vertex are integrated with the old
  ;; theory. If not found, the new theory vertex is tacked on to
  ;; *neither-theory*. All the work is done recursively through calls to
  ;; "merge-vertex" and "merge-children".
  ;;-------------------------------------------------------------------------
  (incf *deduction-marker*)
  (loop for vert in newtheory
	for oldvert = (find-vertex (vertex-prop vert))
        do (merge-vertex vert oldvert)
           (unless oldvert (push vert *neither-theory*))
	finally
	(return *neither-theory*)))


(defun merge-vertex (vert oldvert)
  ;;-------------------------------------------------------------------------
  ;; First, this routine checks to see if the new vertex in "vert" has any 
  ;; children. If not, nothing needs to be done with it here (the caller will
  ;; take care of keeping or discarding "vert"). If there are children, then
  ;; this routine recursively calls "merge-children". When that returns, the
  ;; children are nconced on to the children of oldvert, if oldvert exists.
  ;;-------------------------------------------------------------------------
  (if (= *deduction-marker* (vertex-Dvisited vert))
      (return-from merge-vertex))
  (setf (vertex-Dvisited vert) *deduction-marker*)
  (let ((children (vertex-children vert)))
    (when children
      (merge-children children)
      (when oldvert
        (setf (vertex-children oldvert)
              (append (vertex-children oldvert) children))
        (setf (vertex-no-rules? oldvert) nil)))))


(defun merge-children (children)
  ;;-------------------------------------------------------------------------
  ;; Given a list of children, this routine loops through the children, 
  ;; linking them in to the current theory. There are two checks made here.
  ;; If the antecedent in question points to a vertex which has children, 
  ;; then a call is made recursively to "merge-vertex". Regardless of how 
  ;; that call turns out, this routine must make a second check to see if
  ;; the antecedent should point to its current vertex or to some other 
  ;; vertex in the old theory. If an oldvert exists, then the antecedent is
  ;; updated to point there, otherwise it is left alone (note that since the
  ;; recursive call to merge-vertex would have moved any children to the 
  ;; oldvert, we don't lose any references to new children by changing the
  ;; antecedent to point to oldvert).
  ;;-------------------------------------------------------------------------
  (loop for child in children
	do
	(loop for ante in (rule-antecedents child)
	      for prop = (antecedent-prop ante)
	      for oldvert = (find-vertex (if (listp prop) (car prop) prop))
	      do (when (vertex-children (antecedent-vertex ante))
                   (merge-vertex (antecedent-vertex ante) oldvert))
                 (when oldvert
                   (setf (antecedent-vertex ante) oldvert)))))
