;;; FILE:        FOIL.L
;;; PROGRAMMER:  John M. Zelle
;;; DATE:        2/1/92
;;; DESCRIPTION: A LISP implementation of the FOIL (First Order Inductive
;;;    Learning) algorithm for learning relational concept definitions.
;;;    For backgound on the algorithm, see :
;;;
;;;    Quinlan, J. R., "Learning Logical Definitions from Relations,"
;;;    in Machine Learning, 5, 1990.  
;;;    
;;;    The version presented here is somewhat simplified in that it
;;;    uses a much weaker test to constrain recursive predicates
;;;    (see the function, RECURSION-FILTER), and it does not incorporate
;;;    encoding length restrictions to handle noisy data.  There is also 
;;;    no post-processing of clauses to simplify learned definitions, 
;;;    although this would be relatively easy to add.

;;; MODIFIED: (2/6/92 JMZ) Cleaned up variable names and added parameter
;;;    *use-negated-literals*.

(proclaim '(optimize (speed 3)))

;;; REPRESENTATION OF HYPOTHESES
;;;
;;; FOIL hypotheses are represented as a set of function-free Horn
;;; clauses.  Since the heads of the clauses of a definition are all
;;; identical, they are not explicitly represented in the working
;;; hypothesis.  Instead, the program maintains a list of clause bodies
;;; representing the current concept definition.

;;; Each clause body is a list of antcedents.  An empty body corresponds
;;; to the single antecedent, TRUE.  An antecedent is a list consisitng
;;; of the predicate name of the antecedent followed by its variables.
;;; A variable is represented by a natural number.  Thus, the body
;;; a clause such as:
;;;
;;;     list(A) :- components(A,B,C), list(C).
;;;
;;; would be represented as:
;;;
;;;     ( (components 0 1 2) (list 2) )
;;;
;;; Background knowledge is represented as a property list of
;;; predicates with associated sets of tuples which are in 
;;; the relation.  Examples:

(setf list-preds
      '( list ( (()) ((a)) ((b (a) d)) (((a) d)) ((d)) )
	 null ( (()) )
	 components ( ((a) a ()) ((b (a) d) b ((a) d))
		      (((a) d) (a) (d)) ((d) d ()) ((e.f) e f))))
  
(setf can-reach-preds
      '( can-reach ( (0 1) (0 2) (0 3) (0 4) (0 5) (0 6) (0 8)
    	             (1 2) (3 2) (3 4) (3 5) (3 6) (3 8) (4 5) 
	             (4 6) (4 8) (6 8) (7 6) (7 8) )
	 linked-to ( (0 1) (0 3) (1 2) (3 2) (3 4)
		     (4 5) (4 6) (6 8) (7 6) (7 8) )))

;;; FOIL returns a list of clause bodies  defining the predicate,
;;; NAME, in terms of the background knowledge in PREDICATES.
;;; The resulting definition covers all of the positive examples
;;; and none of the negative.  If negative examples are not
;;; provided, a closed-world assumption is used.
;;; PREDICATES is a property list where each key is the
;;; name of a relation, and the data is a list of lists where
;;; each list represents a "tuple" which is in the relation. NOTE:
;;; The set of positive tuples for the predicate to be learned must
;;; be part of PREDICATES.  Try: (foil 'list list-preds)

(defparameter *use-negative-literals* nil)

(defun foil (target-concept extensional-defs &optional negatives)
 (let* ((positives (getf extensional-defs target-concept))
	(universe (create-universe extensional-defs))
	(background-preds (mapcan #'(lambda (x) (if (atom x) (list x)))
				  extensional-defs))
	(background-tuples (mapcan #'(lambda(x) (if (consp x) (list x)))
				   extensional-defs))
	(positives-to-cover positives)
	positive-tuples	negative-tuples current-clause 
	clauses info-value best-literal)
   (unless negatives
     (setf negatives (close-world positives universe)))
   (format t "~%SETUP COMPLETE -- STARTING INDUCTION~%")
   
   ;;; The top-level loop continues until all positive examples have
   ;;; been covered by some clause in the hypothesis.
   (loop (if (null positives-to-cover) (return clauses))
      (setf current-clause nil)
      (setf positive-tuples positives-to-cover)
      (setf negative-tuples negatives)
      
      ;;; Add antecedents to the current clause until it covers no
      ;;; negative examples.
      (loop (if (null negative-tuples) (return))
	 (format t "~%CURRENT CLAUSE: ~A~%STILL COVERS NEGATIVE~%"
		 current-clause)

	 ;;; Find the literal whose addition produces most info gain
	 (setf info-value
	       (compute-info-value (length positive-tuples)
				   (length negative-tuples)))
	 (setf best-literal 
	       (find-best-literal target-concept background-preds background-tuples
				  positive-tuples negative-tuples info-value))
	 (push best-literal current-clause)
	 (format t "~%~%BEST ANTECENDENCT FOUND: ~a" best-literal)

	 ;;; Modify the psoitive and negative tuples to reflect the
	 ;;; newly extended current-clause.
	 (setf positive-tuples 
	       (extend-tuples-with-literal positive-tuples
					   extensional-defs
					   best-literal))
	 (setf negative-tuples
	       (extend-tuples-with-literal negative-tuples
					   extensional-defs
					   best-literal)))
      ;;; Add the new clause to the hypothesis, and calculate the positive
      ;;; examples yet to be covered.
      (push (nreverse current-clause) clauses)      
      (format t "~%~%>>>>>FOUND CLAUSE: ~a" (car clauses))
      (setf positives-to-cover 
	    (set-difference positives-to-cover
			    positive-tuples
			    :test #'subtuplep)))))

;;;---------------------------------------------------------------------------
;;; These functions implement a "closed-world" convention for generating
;;; negative examples.
;;;---------------------------------------------------------------------------

;;; return a list containing all terms that appear in a list of tuples
(defun collect-terms-from-tuples (tuple-list)
 (delete-duplicates 
  (mapcan #'(lambda (x) x) (copy-tree tuple-list))
  :test #'equal))

;;; return a list of all terms that appear in tuples of the extensional
;;; definitions, EXT-DEFS
(defun create-universe (ext-defs)
  (delete-duplicates 
     (mapcan #'(lambda (X)
		 (if (consp x)
		     (collect-terms-from-tuples x)))
	     ext-defs)
     :test #'equal))

;;; return all the tuples of LENGTH arguments which can be constructed
;;; using terms from the set CONSTANTS.
(defun all-tuples (constants length)
  (if (zerop length)
      (list nil)
      (let ((one-less-list (all-tuples constants (1- length))))
	(mapcan #'(lambda (short-tuple)
		    (mapcar #'(lambda (c) (cons c short-tuple))
			    constants))
		one-less-list))))

;;; returns a list of all tuples constructable from terms in UNIVERSE
;;; which are not in TUPLES.
(defun close-world (tuples universe)
  (set-difference (all-tuples universe (length (car tuples)))
		  tuples
		  :test #'equal))

;;;---------------------------------------------------------------------------
;;; Functions for finding a useful antecedent.  These are the real heart
;;; of the hill-climbing search used by FOIL.
;;;---------------------------------------------------------------------------

;;; FIND-BEST-LITERAL searches the space of all "reasonable" literals which
;;; can be constructed from the background predicates to find the one which
;;; yields maximum information gain.  A literal is "reasonable" if it shares
;;; at least one variable with the tuples constructed so far, and does not
;;; lead to infinite or vacuous recursion.  TARGET-CONCEPT is the
;;; name of the concepte whose definition is being learned.  It must be known
;;; to apply special restrictions on recursive literals.  ANTE-NAMES
;;; is a list of the names of the background predicates.  EXT-DEFS
;;; is a corresponding list of extensional definitions (tuple lists).
;;; POSITIVE-TUPLES is a list of tuples which are extensions of positive
;;; examples which are covered by the current clause.  That is, each tuple
;;; is a list of ground substitutions for the variables in the current
;;; clause.  The order of the tuple is crucial, position 0 represents the
;;; value of the first variable, position 1, the second, etc.  The number
;;; of "old" variables in the current clause so far is therefore the length
;;; of each tuple.  NEGATIVE-TUPLES is the list of extensions of negative
;;; examples covered by the current clause.  INFO-VALUE is the current
;;; information measure for the current tuples.
;;; The function returns two values, the best antecedent, and the gain.
;;; NOTE:  This code is somewhat ugly, but relatively efficient.

(defun find-best-literal (target-concept ante-names ext-defs
				  positive-tuples negative-tuples info-value)
  (let ((max-gain 0)
	(old-var-count (length (first positive-tuples)))
	best-literal info-gain potential-gain-by-specializing)

    ;;; Try all possible unnegated literals
    (do* ((definitions ext-defs (cdr definitions))
	  (ext-def (car definitions) (car definitions))
	  (name-list ante-names (cdr name-list))
	  (ante-name (car name-list) (car name-list))
	  (arity (length (car ext-def)) (length (car ext-def)))
	  (all-bindings (generate-variablizations old-var-count arity)
			(generate-variablizations old-var-count arity))
	  (prune-list nil))
      ((null definitions))

      (format t "~% TRYING PREDICATE: ~a..." ante-name)
      (if (eq ante-name target-concept)
	  (setf all-bindings (recursion-filter  old-var-count 
						all-bindings)))
      (setf prune-list nil) ;;; Tuples whose specializations can't win.
      (dolist (binding all-bindings)
	;;; Test if this binding is known to be futile
	(unless  (member binding prune-list 
			:test #'(lambda (x y) 
				  (same-old-vars x y old-var-count)))
	  (multiple-value-setq (info-gain potential-gain-by-specializing)
	    (compute-info-gain info-value positive-tuples negative-tuples 
			       ext-def binding))
	  (when (> info-gain max-gain)
	    (setf max-gain info-gain)
	    (setf best-literal (cons ante-name binding))
	    (format t "~%     BEST SO FAR: ~a  ~a" best-literal max-gain))
	  (if (< potential-gain-by-specializing max-gain)
	      (push binding prune-list)))))

    ;;; This loop is just the same, for negated literals
    (when *use-negative-literals*
      (do* ((definitions ext-defs (cdr definitions))
	    (ext-def (car definitions) (car definitions))
	    (name-list ante-names (cdr name-list))
	    (ante-name (car name-list) (car name-list))
	    (arity (length (car ext-def)) (length (car ext-def)))
	    (all-bindings (generate-variablizations old-var-count arity)
			  (generate-variablizations old-var-count arity))
;	    (prune-list nil)
	    )
	((null definitions))
	
	(format t "~% TRYING PREDICATE: NOT ~a..." ante-name)
	(if (eq ante-name target-concept)
	    (setf all-bindings (recursion-filter  old-var-count 
						  all-bindings)))
;	(setf prune-list nil)
	(dolist (binding all-bindings)
;	  (unless (member binding prune-list 
;			  :test #'(lambda (x y)
;				    (same-old-vars x y old-var-count)))
	    (multiple-value-setq (info-gain potential-gain-by-specializing)
	       (compute-negative-info-gain 
		  info-value positive-tuples negative-tuples ext-def binding))
	    (when (> info-gain max-gain)
	      (setf max-gain info-gain)
	      (setf best-literal (list 'not (cons ante-name binding)))
	      (format t "~%     BEST SO FAR: ~a  ~a" best-literal max-gain))
;	    (if (< potential-gain-by-specializing max-gain)
;		(push binding prune-list)))
	    )))

    (values best-literal max-gain)))


;;; GENERATE-VARIABLIZATIONS is a somewhat subtle function which
;;; returns a list of possible bindings for a new antecedent of arity, SIZE
;;; given that the tuples currently have OLD-VAR-COUNT variables.  Each
;;; binding is a list of numbers corresponding to a way in which vars
;;; may be assigned in the new antecedent.  Recall, numbers less than
;;; old-var-count represent existing variables, and numbers equal or
;;; greater are new (unbound) vars.
;;; NOTE:  The order in which bindings are returned is crucial to the
;;; pruning of the search space.  Tuples having identical bindings to
;;; old variables are returned with the most general binding earliest
;;; in the list.
(defun generate-variablizations (old-var-count size)
  (let (count possible-vars result)
    ;;; First generate a list containing the numbers corresponding to
    ;;; the existing variables and -1 for a new variable "marker"
    (dotimes (i (1+ old-var-count) t)
      (push (- old-var-count i 1) possible-vars))

    ;;; Now generate all possible variations of old variables with 
    ;;; -1 marking places for new variables.  NOTE the "cdr" serves
    ;;; to remove the binding having all new variables.
    (setf result (cdr (all-tuples possible-vars size)))

    ;;; Replace the new variable markers with independent new
    ;;; variable numbers.  Each binding which introduces new
    ;;; variables is now the most general case in that each
    ;;; new variable is independent.
    (dolist (v-ation result result)
      (setf count old-var-count)
      (do ((ptr v-ation (cdr ptr)))
        ((null ptr) result)
	(when (equal (car ptr) -1)
	  (setf (car ptr) count)
	  (incf count))))

    ;;; Finally, expand each binding containing new variables into
    ;;; into a set of bindings representing all possible dependencies
    ;;; among the new var positions.

    (mapcan #'(lambda (binding) (expand-on-new-vars binding old-var-count))
	    result)))

;;; EXPAND-ON-NEW-VARS
;;; Given a binding (a list of naturals) and the number of the "first"
;;; new variable, this function returns the list of all bindings which
;;; can be formed by specialization which unifies some of the new vars.
(defun expand-on-new-vars (binding new-var)
  (let* ((new-var-list (mapcan #'(lambda (x)
				   (if (>= x new-var)
				       (list x)))
			       binding))
	 (partitions (all-partitions new-var-list)))
    (if new-var-list
	(mapcar 
	   #'(lambda (partition)
	       (do* ((new-binding (copy-list binding))
		     (ptr new-binding  (cdr ptr)))
		 ((null ptr) new-binding)
		 (unless (< (car ptr) new-var)
		   (setf (car ptr) 
			 (find-var-number (car ptr) partition new-var)))))
	   partitions)
	(list binding))))

;;; The next three functions, EXTEND-PARTITION, ALL-PARTITIONS, and
;;; FIND-VAR-NUMBER are used to calculate all the possible paritionings
;;; among new variables.

;;; A partition is a list of lists which represent equivalence classes.

;;; Given a parition and a new element, generate a list of all the new
;;; partitions that could be constructed by making the element a singleton
;;; subset, or adding it to one of the existing equivalence classes.
(defun extend-partition (value partition)
  (cons (cons (list value) partition)
	(mapcar #'(lambda (subset)
		    (substitute (cons value subset) subset partition
				:test #'equal :count 1))
		partition)))

;;; Given a list of numbers return a list of all possible paritionings
;;; of the list into equivalence classes.
(defun all-partitions (set)
  (if (null (cdr set))
      (list (list set))
      (mapcan #'(lambda (partition)
		  (extend-partition (car set) partition))
	      (all-partitions (cdr set)))))

;;; Find the class in which var occurs in partition, and return a 
;;; new variable name based on start-count representing all of
;;; the variables falling in the first class, start-count + 1 the
;;; second, etc.
(defun find-var-number (var partition start-count)
  (dolist (subset partition)
    (if (member var subset :test #'eql)
	(return start-count)
	(incf start-count))))
		 

;;;---------------------------------------------------------------------------
;;; OPERATIONS ON TUPLES
;;;---------------------------------------------------------------------------

;;; if EXTUPLE can extend TUPLE under the constraints of binding, return
;;; the extension, o/w NIL.  e.g. (a b c) (1 3) (b f) returns (a b c f)
;;; since the 1th position of (a b c) contains "b" and the tuples are
;;; therefore compatible.  Binding forces corresponding positions
;;; in exttuple to match tuple.
(defun extend-tuple-with-tuple (tuple binding exttuple)
  (do ((var-positions binding (cdr var-positions))
       (values exttuple (cdr values))
       (first-new-var (length tuple))
       (new-binding))
      ( (null var-positions) (append tuple (reverse new-binding)) )
    (if (< (car var-positions) first-new-var)
	(unless (equal (nth (car var-positions) tuple) (car values))
	  (return nil))
	(push (car values) new-binding))))

;;; return a list of all possible extensions of tuple with the tuples of
;;; EXT-DEFINITION under the constraints imposed by BINDING.
(defun all-extensions (tuple binding ext-definition)
 (delete-duplicates
  (mapcan #'(lambda (x) 
	      (let ((extension (extend-tuple-with-tuple tuple binding x)))
		(if extension (list extension))))
	  ext-definition)))

;;; COMPUTE-INFO-VALUE returns the information content of a positive
;;; signal for a set containing PLUS-COUNT positives and NEG-COUNT 
;;; negatives.  The goofy constant is the binary log conversion.
(defun compute-info-value (plus-count neg-count)
  (- (*  (log (/ plus-count (+ plus-count neg-count))) 1.4426950408889634)))


;;; COMPUTE-INFO-GAIN
;;; returns two values, the information gained by using this binding of
;;; the relation defined by ext-definition, and the maximum gain
;;; potentially achievable by specializations of this binding.
(defun compute-info-gain (current-info-value
			  pos-tuples
			  neg-tuples
			  ext-definition
			  binding)
  (let (new-pos-count new-neg-count pos-retained)
    (multiple-value-setq (new-pos-count pos-retained)
      (count-extensions pos-tuples ext-definition binding))
    (if (zerop pos-retained) (return-from compute-info-gain (values 0 0)))
    (setq new-neg-count 
      (count-extensions neg-tuples ext-definition binding))
    (values
        (* pos-retained 
	   (- current-info-value 
	      (compute-info-value new-pos-count new-neg-count)))
	(* pos-retained current-info-value))))

;;; COMPUTE-NEGATIVE-INFO-GAIN (same as above, but for negated antecedent)
(defun compute-negative-info-gain (current-info-value
				   pos-tuples
				   neg-tuples
				   ext-definition
				   binding)
  (let (new-pos-count new-neg-count)
    (setf new-pos-count 
	  (count-unextendable pos-tuples ext-definition binding))
    (if (zerop new-pos-count) 
	(return-from compute-negative-info-gain (values 0 0)))
    (setf new-neg-count 
	  (count-unextendable neg-tuples ext-definition binding))
    (values
       (* new-pos-count
	(- current-info-value 
	   (compute-info-value new-pos-count new-neg-count)))
       (* current-info-value new-pos-count))))

;;; COUNT-EXTENSIONS returns two values, the number of tuples
;;; in the set which results from expanding each tuple in TUPLES
;;; with all compatible tuples from EXT-DEFINITION under the restriction
;;; imposed by binding, and the number of tuples which have any extension.
(defun count-extensions (tuples ext-definition binding)
  (let ((sum 0)
	(retained 0)
	temp-count)
    (dolist (tuple tuples (values sum retained))
      (setf temp-count (count-compatible-tuples tuple ext-definition binding))
      (when (not (zerop temp-count))
	(incf retained)
	(incf sum temp-count)))))

;;; COUNT-COMPATIBLE-TUPLES returns the number of tuples in EXT-DEFINITION
;;; which can extend TUPLE under the constraints of BINDING
(defun count-compatible-tuples (tuple ext-definition binding)
  (let ((sum 0))
    (dolist (cand ext-definition sum)
      (if (compatible-extension tuple cand binding)
	  (incf sum)))))

;;; COMPATIBLE-EXTENSION returns TRUE if the tuples, TUPLE and
;;; EXTENDER are compatible under BINDING.
(defun compatible-extension (tuple extender binding)
  (do* ((old-vars (length tuple))
	(ext-ptr extender (cdr ext-ptr))
	(bind-ptr binding (cdr bind-ptr))
	(current-var (car bind-ptr) (car bind-ptr)))
    ((null ext-ptr) t)

    (if (and (< current-var old-vars)
	     (not (equal (nth current-var tuple) (car ext-ptr))))
	(return nil))))

;;; COUNT-UNEXTENDABLE returns the number of tuples in TUPLES which
;;; have no extension in EXT-DEFINITION compatible under the restrictions
;;; imposed by BINDING
(defun count-unextendable (tuples ext-definition binding)
  (let ((sum 0))
    (dolist (tuple tuples sum)
      (unless (extendablep tuple ext-definition binding)
	  (incf sum)))))

;;; EXTENABLEP returns if there is a tuple in EXT-DEFINITION compatible with
;;; tuple under BINDING.
(defun extendablep (tuple ext-definition binding)
  (dolist (ext ext-definition nil) 
    (if (compatible-extension tuple ext binding)
	(return t))))

;;; SUBTUPLEP returns true if the tuple (PREFIX) is a prefix of TUPLE.
;;; i.e. TUPLE is an extension of prefix.
(defun subtuplep (prefix tuple)
  (do ((pre-ptr prefix (cdr pre-ptr))
       (tup-ptr tuple (cdr tup-ptr)))
    ((null pre-ptr) t)
    (unless (equal (car pre-ptr) (car tup-ptr))
      (return nil))))

;;; EXTEND-TUPLES-WITH-LITERAL returns a list of the extended
;;; tuples from TUPLES which are compatible with LITERAL given the
;;; background information in EXT-DEFINITIONS.
(defun extend-tuples-with-literal (tuples ext-definitions literal)
  (if (eq (car literal) 'not)
      (let* ((pure-literal (second literal))
	     (extuples (getf ext-definitions (car pure-literal)))
	     (binding (cdr pure-literal)))
	(remove-if #'(lambda (x) (extendablep x extuples binding))
		   tuples))
      (let ((exttuples (getf ext-definitions (car literal)))
	    (binding (cdr literal)))
	(mapcan #'(lambda(tuple) (all-extensions tuple binding exttuples))
		tuples))))

;;;---------------------------------------------------------------------------
;;; Operations on bindings.
;;;---------------------------------------------------------------------------

;;; RECURSION-FILTER return a list of those bindings in BINDINGS which are 
;;; "safe" for a recursive antecedent.  NOTE: Currently, the safety is very
;;; weak.  The function only insures that a recursive call contains variables
;;; not contained in the head of the clause, and introduces no new vars.
(defun recursion-filter (new-var bindings)
  (delete-if 
      #'(lambda (binding)
	  (let ((max-var (apply #'max binding)))
	    (or (>= max-var new-var)  ;;; contains brand-new vars
		(< max-var (length (first bindings)))))) ;;; contains only orig
      bindings))

;;; SAME-OLD-VARS returns true if both bindings have all occurrences of
;;; vars numbered less than NEW-VAR in the exact same position.  NOTE:
;;; this guarantees that one of the bindings is a specialization of the
;;; other since relations among new vars can only restrict the set of
;;; tuples covered by a literal.
(defun same-old-vars (binding1 binding2 new-var)
  (do* ((ptr1 binding1 (cdr ptr1))
	(ptr2 binding2 (cdr ptr2))
	(var1 (car ptr1) (car ptr1))
	(var2 (car ptr2) (car ptr2)))
    ((null ptr1) t)

    (if (and (not (eql var1 var2))
	     (or (< var1 new-var)
		 (< var2 new-var)))
	(return nil))))

