
;;===========================================================================
;; ID3 utility routines for NEITHER 
;;
;; -------------------------------------------------------------------------
;; AUTHORS: Paul T. Baffes.
;; Copyright (c) 1992 by AUTHORS. This program may be freely copied, used,
;; or modified provided that this copyright notice is included in each copy
;; of this code and parts thereof. 
;; -------------------------------------------------------------------------
;;
;; This file provides a few routines useful for making the correct calls to
;; ID3 to generate new rules and incorporate them into the theory. Most of
;; the tough work for induction is carried out by the routines in
;; "induce.lisp"; this code is only the stuff that is peculiar to ID3.
;;
;; CHANGE HISTORY
;;
;; 8-31-92: (ptb) Fixed bug in the "dt2th" (ie, decision tree to theory)
;;          routine (Chris ignored all "false" nomimal values). Added
;;          comments to code.
;; 9-11-92: (ptb) Changed the dt2th routine to prune rules during
;;          translation. 
;; 11-JAN-93: (ptb) had to add a threshold argument to the dt2th so it could
;;            handle threshold rules. 
;; 23-FEB-93: (ptb) took most of the routines out of here and put them into
;;            a new file called "induce.lisp" so that I could use ID3 or a
;;            different induction mechanism called PFOIL.
;;===========================================================================

(in-package #+:cltl2 "CL-USER" #-:cltl2 "USER")


(defun id3-induce (examples desired-category threshold base-antes)
  ;;-------------------------------------------------------------------------
  ;; A wrapper routine to make the correct internal calls here to do an ID3
  ;; type induction.
  ;;-------------------------------------------------------------------------
  (dt2th (train-id3 examples) desired-category threshold base-antes))


(defun dt2th (dt desired-category threshold &optional branch-vals)
  "Converts a decision tree (dt) to a theory (th). Keeps only those rules
whose consequent matches desired-category. The optional branch-vals argument
is used by the recursive steps."
  ;;-------------------------------------------------------------------------
  ;; The recursion here is based upon the structure of a decision tree. The
  ;; idea is to branch down the decision tree, until you hit a leaf node.
  ;; Leaves are signified by symbols rather than structures; that is, the
  ;; decision tree is a symbol.
  ;;
  ;; Here's the structure of a decision tree as outlined in Ray's ID3 code. A
  ;; decision tree looks like:
  ;;
  ;;    (decision-tree
  ;;       feature           ;; an integer representing the feature
  ;;       threshold         ;; for linear features (not used here)
  ;;       branches)         ;; a list of branch structures
  ;;
  ;; and a branch structure looks like:
  ;;
  ;;    (branch
  ;;       value             ;; value of the (parent) feature for this branch
  ;;       ratio             ;; fraction of examples classified down branch
  ;;       subtree)          ;; a symbol (category) or decision tree
  ;;
  ;; Thus, a decision tree starts with a feature, with a branch for each of
  ;; the values of the feature that are classified by the tree. The feature
  ;; tells you where to look in the example, and the value of that feature in
  ;; the example tells you which branch to take.
  ;;
  ;; If the subtree of a branch is a symbol, the example is categorized as
  ;; that symbol. If it's a decision tree, the recursive descent is repeated
  ;; until a leaf node (symbol) is found to categorize the example.
  ;;
  ;; In this routine, then, the method for pulling rules out of the decision
  ;; tree follows this recursive descent. As we descend the tree, depth
  ;; first left-to-right, we keep track of each feature-branch pair in the
  ;; "theory" variable. When we hit a leaf, we terminate recursion and output
  ;; a rule of the form (symbol <- elements-in-branch-vals) if that leaf
  ;; matches desired-category.
  ;;
  ;; CHANGE 9-11-92(ptb): Well, I made a change to the recursion to optimize
  ;; the rules coming out of this routine. In the event that a true-false
  ;; decision node has a positive classification down its T branch, we know
  ;; that the negation of that need not be recursively passed to the F
  ;; branch. For example:
  ;;
  ;;               A            Output rules:
  ;;             t/ \f          ------------
  ;;             +  rest          + <- A
  ;;                              + <- ^A & rest
  ;;
  ;; The above tree normally produces the two rules shown. Using DeMorgan's
  ;; laws yields the following:
  ;;
  ;;   + <- A V (^A & rest)
  ;;   + <- (A V ^A) & (A V rest)
  ;;   + <- T & (A V rest)
  ;;   + <- A V rest
  ;;
  ;; which leads to the two following rules: + <- A and + <- rest, indicating
  ;; that the ^A is not required in the second rule.
  ;;
  ;; 11-JAN-93 (ptb): Added another argument to the parameter list for this 
  ;; routine to pass in the old rule's threshold. This is key for dealing 
  ;; thresholded rules since generalization will delete some antecedents and
  ;; add new ones to the old rule. One must have the correct threshold from
  ;; the old rule or the resulting new rule may be too restrictive.
  ;;-------------------------------------------------------------------------
  (if (symbolp dt)
      (if (eq dt desired-category) 
          `((,threshold ,dt <- ,@branch-vals)))
      (let ((f (feature-name (decision-tree-feature dt)))
            (br (decision-tree-branches dt))
            (add-false-case t))
        (loop for b in br
              append
              (dt2th (branch-subtree b)
                     desired-category
                     threshold
                     (case (branch-value b)
                       (false (if add-false-case
                                  `((not ,f) ,@branch-vals)
                                  branch-vals))
                       (true (if (eq (branch-subtree b) desired-category)
                                 (setf add-false-case nil))
                             `(,f ,@branch-vals))
                       (t `((,f ,(branch-value b)) ,@branch-vals))))))))
