 
;;;; Copyright (c) 1990, 1991 by the University of California, Irvine. 
;;;; This program may be freely copied, used, or modified provided that this
;;;; copyright notice is included in each copy of this code.  This program
;;;; may not be sold or incorporated into another product to be sold withou
;;;; written permission from the Regents of the University of California.
;;;; This program was written by Michael Pazzani, Cliff Brunk, Glenn Silverstein
;;;; and Kamal Ali.  

(in-package :user)

;;;___________________________________________________________________________
;;; FIND-MAXIMUM-LITERAL
;;;
;;; returns the predicate variabilization of pred with the maximum information gain
;;; works for intension and extensional pred
;;; if return-all-equal-best-varzns?, returns a list of equal best variabilizations
;;;
;;; returns 4 values 
;;; 1. predicate-with-maximum-gain       - r-struct structure (or a list of equal best r-structs)
;;; 2. variabilization-with-maximum-gain - list of variables (?j old) (?-j new) (or list of lists of variables)
;;; 3. max-negated?                      - t if negation of relation has maximum gain (of list negations)
;;; 4. max-gain                          - the amount of information gain
;;;
;;; returns :fail if no variabilization has gain greater than 0, or
;;;         nil   if all variablizations with non zero require too many bits to encode
;;;
;;;  Note if pruning III occurs this is indicated by the special variable
;;;       *covered-all-pos-tuples* (this variable should only be set by the
;;;       macro check-pruning-III-and-prune) to facilitate the proper control flow.

(defun find-maximum-literal
       (current-state-value			;passed to info-gain
        predicate-being-learned			;used to detect recursion
        variables				;list of old variables
        variable-types				;types of old variables
        maximum-new-vars			;maximum number of new variables allowed
        pos-tuples				;positive tuples
        neg-tuples				;negative
        original-vars				;used to detect recursion
        use-hash-tables				;used by info-gain
        all-preds				;list of predicates to try constructing literals from
        winners					;don't consider predicates with less than this much gain
	source
	&key
	(var-restrictions nil)
        (pred-restrictions nil)
        (instantiated-cliche nil)
        (position-in-cliche nil)
        (try-cliches nil)
        )
  
  (let ((epsilon 0.001)               ;; when a variabilization achieves within epsilon of its max possible gain prune away specializations of that variabilization
        (covered-all-pos-tuples nil)  ;; when a variabilization covers all remaining tuples for this clause dont look at any more variabilizations for any predicate 
        (lpos (length pos-tuples))
        (lneg (length neg-tuples))
        pred variabilization max-possible-gain gain)
    covered-all-pos-tuples
    
    ;;loop thru all predicates
    (dolist (pair (apply-pred-restrictions all-preds pred-restrictions maximum-new-vars))
      (setq pred (rest pair))
      (cond
       ((not (r-induction pred)) nil)
       ((not (any-variabilizations? (r-type pred) variable-types maximum-new-vars)) nil)
       ((and *stopping-criteria-enabled*
             (predicate-requires-too-many-bits pred)) nil)
       (t    ;; CHECK POSITIVE VARIABLIZATIONS
        (do* ((variabilizations
               (apply-variabilization-restrictions
                pred
                var-restrictions
                (get-variabilizations pred maximum-new-vars variables predicate-being-learned variable-types original-vars)
                variables
                variable-types ; ges added 10/28
                original-vars
                (r-type predicate-being-learned) ; ges added 10/28 - original types 
                instantiated-cliche
                position-in-cliche)
               (rest variabilizations))
              (current-struct (first variabilizations) (first variabilizations)))
             
             ((null variabilizations))		;; exit when no more variabilizations
          
          (setq variabilization
                (variabilization-struct-variabilization current-struct))
          (when (variabilization-struct-look-at-positive? current-struct)
            (multiple-value-setq
              (gain max-possible-gain covered-all-pos-tuples)
              (info-gain pred variabilization nil pos-tuples neg-tuples current-state-value
                         (and use-hash-tables (if  instantiated-cliche (= position-in-cliche 0) t))
                         :instantiated-cliche instantiated-cliche	; to print out something more appropriate
                         :position-in-cliche position-in-cliche
                         :lpos lpos
                         :lneg lneg))

            (unless instantiated-cliche
              (add-to-determinate-rs-and-vars pred variabilization gain lpos lneg))
            
            (when (update-winner? winners *literal-better-function* nil gain pred source
                                  :vars variabilization :negated? nil
                                  :instantiated-cliche instantiated-cliche)
              (update-winner winners *literal-better-function* nil gain pred source
                             :vars variabilization :negated? nil
                             :instantiated-cliche instantiated-cliche))
            
            (setf (variabilization-struct-look-at-negative? current-struct)
                  (and (>= 0 (gain-gain gain))	                      ;; only try negative if positives <= to 0
                       (> 2 (count-if #'new-var? variabilization))))  ;; not more than 1 free var
            
            ;; now see if we can prune some variabilizations out
            ;; 1 stands for type I pruning ... normal quinlan
            ;; 2 stands for type II pruning ... when a varzn's info-gain is within epsilon of
            ;;                                  max-possible gain for that varzn, prune
            ;;                                  specializations of that varzn
            (when (find-if #'new-var? variabilization)
              (if (< max-possible-gain (best-gain-so-far winners))
                (prune variabilization (rest variabilizations) 1)
                (when (< (- max-possible-gain (gain-gain gain)) epsilon)
                  (prune variabilization (rest variabilizations) 2))))
            
            (discard-gain gain)))
        
        ;; CHECK NEGATIVE VARIABLIZATIONS
        ;; even with pruning-III, still need to visit this set of varzns
        
        (look-at-negative-variabilizations
         ;; cliches aren't appropriate with negative variablizations so
         ;; don't generate them when processing cliches (but ok with try
         ;; all conjunctions)
         (when (or *try-all-conjunctions* (not try-cliches))	; note can't try cliches first anymore
           (reverse (get-variabilizations pred maximum-new-vars variables predicate-being-learned variable-types original-vars)))
         pred
         pos-tuples
         neg-tuples
         current-state-value
         winners
         use-hash-tables
         source
         :instantiated-cliche instantiated-cliche
         :position-in-cliche position-in-cliche)
        
        (reset-variabilization-flags (gethash (* 100 maximum-new-vars (length variables)) (r-variabilizations pred))))))
    winners))


;;;___________________________________________________________________________
;;; LOOK-AT-NEGATIVE-VARIABLIZATIONS
;;;
;;; these variabilizations have already been checked for induction, mode etc.
;;; they are guaranteed to have at most one new (unbound) variable
;;; varzn is an abbreviation for variabilization
;;;
;;; returns: 1. new max-gain over all predicates so far for current literal
;;;             ... or a list if return-all.. == t
;;;          2. best variabilization (unchanged if max-gain wasnt topped)
;;;              ...or a list
;;;          3. max-negated?: t if max-gain was topped (by a negative variabilization)
;;;               ... or a list
;;;          4. max of the gain of the incoming value, and any gains made by
;;;                the -ve lits examined for the current predicate
;;;          5. covered-all-pos-tuples - to tell find-max-literal if pruning III is appropriate
;;;          6. coverage by the best varzn (negative or not)

(defun look-at-negative-variabilizations (struct-list ;list of varzn structures
                                          pred
                                          pos-tuples
                                          neg-tuples
                                          current-state-value
					  winners
                                          use-hash-tables
					  source
                                          &key
                                          (instantiated-cliche nil)
                                          (position-in-cliche nil))
  (let ((epsilon 0.001)
        (covered-all-pos-tuples nil)
        (lpos (length pos-tuples))
        (lneg (length neg-tuples))
        gain max-possible-gain current-varzn )
    (do* ((remaining-list struct-list (rest remaining-list))
          (current-struct (first struct-list) (first remaining-list)))
         ((null current-struct))
      (setq current-varzn (variabilization-struct-variabilization current-struct))
      (when (variabilization-struct-look-at-negative? current-struct)
        (multiple-value-setq (gain max-possible-gain covered-all-pos-tuples)
	  (info-gain pred current-varzn t pos-tuples neg-tuples current-state-value
		     (and use-hash-tables (if  instantiated-cliche (= position-in-cliche 0) t))
                     :instantiated-cliche instantiated-cliche
		     :position-in-cliche position-in-cliche
                     :lpos lpos
                     :lneg lneg))
        (when (update-winner? winners *literal-better-function* t gain pred source
                              :vars current-varzn :negated? t
                              :instantiated-cliche instantiated-cliche)
          (update-winner winners *literal-better-function* t gain pred source
                         :vars current-varzn :negated? t
                         :instantiated-cliche instantiated-cliche))
   
        ;; t means negated
	;; this macro may return from the return-pt indicated by the second arg 
        ;; nb there are 2 macros: check-pruning-III and check-pruning-III-and-prune
        ;; if this macro evals to nil, control just falls thru
        
	(check-pruning-III covered-all-pos-tuples look-at-negative-variabilizations winners)
        
	;; otherwise try the other 2 types of pruning ...
        
	(when (find-if #'new-var? current-varzn)
	  (if (< max-possible-gain (best-gain-so-far winners) )	; pruning I
            (prune-negations current-varzn (rest remaining-list) 1)
            (when (< (- max-possible-gain (gain-gain gain)) epsilon)	; pruning II
              (prune-negations current-varzn (rest remaining-list) 2))))
        (discard-gain gain)))
    winners))


;;;___________________________________________________________________________
;;; ANY-VARIABLIZATIONS?
;;;
;;;  determine if any variabilizations are possible

(defun any-variabilizations? (pred-type tuple-type maximum-new-vars)
  (let ((type-mismatches 0)
        types-processed)
    (dolist (type pred-type (<= type-mismatches maximum-new-vars))
      (cond ((not (member type types-processed))
             (setq types-processed (cons type types-processed))
             (setq type-mismatches
                   (+ (max (- (count-if #'(lambda (t2) (type-eq type t2)) pred-type)     ;;; changed eql to type-eq
                              (count-if #'(lambda (t2) (type-eq type t2)) tuple-type))   ;;; changed eql to type-eq
                           0)
                      type-mismatches)))
            (t nil)))))

;;;___________________________________________________________________________
;;; FIND-MAXIMUM-LITERAL-FROM-LIST
;;;
;;; returns the predicate variabilization of pred with the maximum information gain
;;; works for intension and extensional pred
;;; if return-all-equal-best-varzns?, returns a list of equal best variabilizations
;;;
;;; returns  updated winners struct when successful

(defun find-maximum-literal-from-list
       (current-state-value			;; information content of current tuple distribution, passed to info-gain
        pos-tuples				;; positive tuples
        neg-tuples				;; negative tuples
        use-hash-tables				;; used by info-gain
        r-struct.variabilization-list		;; a list of (r-struct . variablization) to evaluate using information gain
        winners					;; structure containing the current winners
        source)                                 ;; source of the (r-struct . variablization) pairs typically :template-induction
  
  (let ((epsilon 0.001)
        (covered-all-pos-tuples nil) 
        (lpos (length pos-tuples))
        (lneg (length neg-tuples))
        r-struct variabilization max-possible-gain gain)

    max-possible-gain epsilon

    (dolist (r-struct.variabilization r-struct.variabilization-list)
      (setq r-struct (first r-struct.variabilization)
            variabilization (rest r-struct.variabilization))
      (cond
       ((not (r-induction r-struct)) nil)
       ((and *stopping-criteria-enabled* (predicate-requires-too-many-bits r-struct)) nil)
       (t    
        ;; CHECK POSITIVE VARIABLIZATIONS
        (multiple-value-setq (gain max-possible-gain covered-all-pos-tuples)
          (info-gain r-struct variabilization nil pos-tuples neg-tuples current-state-value use-hash-tables :lpos lpos :lneg lneg))
        
        (add-to-determinate-rs-and-vars r-struct variabilization gain lpos lneg)
        
        (when (update-winner? winners *literal-better-function* nil gain r-struct source :vars variabilization :negated? nil)
          (update-winner winners *literal-better-function* nil gain r-struct source :vars variabilization :negated? nil))
        
        ;;; (check-pruning-III covered-all-pos-tuples find-maximum-literal-from-list winners) 
        ;;; this seems like it should be here but it isn't since it wasn't in find-maximum-literal
        
        (when (and (>= 0 (gain-gain gain))	                   ;; only try negative if positives <= to 0
                   (> 2 (count-if #'new-var? variabilization)))    ;; not more than 1 free var
          
          ;; CHECK NEGATIVE VARIABLIZATIONS
          
          (multiple-value-setq (gain max-possible-gain covered-all-pos-tuples)
            (info-gain r-struct variabilization t pos-tuples neg-tuples current-state-value use-hash-tables :lpos lpos :lneg lneg))
          
          (when (update-winner? winners *literal-better-function* t gain r-struct source :vars variabilization :negated? t)
            (update-winner winners *literal-better-function* t gain r-struct source :vars variabilization :negated? t))
          
          (check-pruning-III covered-all-pos-tuples find-maximum-literal-from-list winners))
        
        (discard-gain gain)))))
  winners)


;;;___________________________________________________________________________
;;; FIND-LITERAL-EXTENSIONAL
;;;
;;; finds extensional literal with maximum gain-  see find-max for details of params
;;;
;;;  returns 7 values
;;;  1 new-literal             -literal structure with maximum gain
;;;                            -suitble for adding to end of clause
;;;  2 new-vars                -names of newvariables (renamed to be old variables)
;;;  3 new-types               -types of new variables
;;;  4 new-pos-tuples          -next set of positive tuples
;;;  5 new-neg-tuples          -next set of negative tuples
;;;  6 max-gain                -maximum information gain (to compare aginst intensional if necessary)
;;;
;;; returns nil if no literal has positive gain

(defun find-literal-extensional (current-state-value             ;passed to info-gain
                                 predicate-being-learned   ;used to detect recursion
                                 variables
                                 variable-types
                                 maximum-new-vars
                                 pos-tuples
                                 neg-tuples
                                 original-vars             ;used to detect recursion
                                 use-hash-tables           ;used by info-gain
                                 winners              ;previous high for minimum-gain
                                 &key
                                 try-cliches
                                 )
  
  ;;find-maximum-literal does the real work- its passed alist *extensional-preds*
  (find-maximum-literal current-state-value
                        predicate-being-learned
                        variables
                        variable-types
                        maximum-new-vars
                        pos-tuples
                        neg-tuples
                        original-vars
                        use-hash-tables
                        *extensional-preds*
                        winners :extensional
                        :try-cliches try-cliches)
  (mapc #'(lambda (winner)
            (create-winner-literal variables
                                   pos-tuples
                                   neg-tuples
                                   winner :extensional variable-types))
        (winners-new-winners winners))
  (setf (winners-new-winners winners) nil)
  winners)



;;;___________________________________________________________________________
;;; FIND-LITERAL-INTENSIONAL
;;;
;;;  finds intensional literal with maximum gain-  see find-max for details of params
;;;
;;; returns 6 values
;;; 1. new-literal     -literal structure with maximum gain, suitable for adding to end of clause
;;;                    -may be operational conjunction of litearals
;;;                         (if *operationalize-intensional is true) or non-operational
;;; 2. new-vars        -names of newvariables (renamed to be old variables)
;;; 3. new-types       -types of new variables
;;; 4. new-pos-tuples  -next set of positive tuples
;;; 5. new-neg-tuples  -next set of negative tuples
;;; 6. max-gain        -maximum information gain (to compare aginst intensional if necessary)
;;;
;;;  returns nil if no literal has positive gain
;;;
;;;  Note: there is a problem when *stopping-criteria-enabled* and *operationalize-intensional*
;;;        find-literal-intensional may fail even though there is an intensional literal with
;;;        positive gain which could be encoded.  The problem arises because cliff's info-gain
;;;        doesn't account for *operationalize-intensional* when determining the bits to encode
;;;        a literal.  Perhaps operationalize could be more closely intergrated with
;;;        find-maximum-literal when *operationalize-intensional* is true.  Find-maximum-literal
;;;        might return "winning" operational disjunct when *operationalize-intensional* is true.
;;;

(defun find-literal-intensional (current-state-value        ;; passed to info-gain
                                 predicate-being-learned    ;; used to detect recursion
                                 variables
                                 variable-types
                                 maximum-new-vars
                                 pos-tuples
                                 neg-tuples
                                 original-vars              ;; used to detect recursion
                                 use-hash-tables            ;; used by info-gain
                                 winners)
  (find-maximum-literal current-state-value
                        predicate-being-learned
                        variables
                        variable-types
                        maximum-new-vars
                        pos-tuples
                        neg-tuples
                        original-vars
                        use-hash-tables
                        *intensional-preds*
                        winners :intensional)
  (cond ((null *operationalize-intensional*)          ;;just like extensional
         (mapc #'(lambda (winner)
                   (create-winner-literal variables
                                          pos-tuples
                                          neg-tuples
                                          winner
                                          :intensional))
               (winners-new-winners winners))
         (setf (winners-new-winners winners) nil)
         winners)
        
        (t ;;similar to operationalization for ebl (except clause is constructed differently)
         ;;;;;return-operational-literal constructs all the return arguments
         (let ((new-winners (winners-new-winners winners)))
           (mapc #'(lambda (x) (remove-winner x winners)) new-winners)
           (mapc #'(lambda (winner)
                     (op-literal
                      (if (winner-negated? winner)
                        (list 'not (cons (r-name (winner-literal winner))  (winner-vars winner)))
                        (cons (r-name (winner-literal winner)) (winner-vars winner)))
                      variables
                      variable-types
                      pos-tuples
                      neg-tuples
                      current-state-value
                      :intensional
                      use-hash-tables
                      winners))
                 new-winners))
         winners)))

;;;________________________________________________________________
;;; INSERT-DERIVATION-TYPE-OF-EVERY-LITERAL

(defun insert-derivation-type-of-every-literal (clause derivation)
  (do ((L clause (literal-next L)))
      ((null L))
    (setf (derivation-type (literal-derivation L)) derivation)
    (if (literal-negated? L)
      (insert-derivation-type-of-every-literal (literal-negated-literals L) derivation))))

;;;________________________________________________________________
;;; RESET-VARIABLIZATION-FLAGS

(defun reset-variabilization-flags (struct-list)
  (dolist (varzn-struct struct-list struct-list)
    (setf (variabilization-struct-look-at-positive? varzn-struct) t    
          (variabilization-struct-look-at-negative? varzn-struct) nil)))


;;;________________________________________________________________
;;; BEST-GAIN-SO-FAR

(defun best-gain-so-far (winners)
  (if (null (winners-all-winners winners)) 0
      (winner-gain (winners-best-gain-so-far winners))))

