;;; -*- Mode:Common-Lisp; Package:USER; Base:10 -*-

;;; This is a version of the AQ algorithm for learning from examples which uses
;;; beam search to generate bounded stars.  February, 1988.

;;;; Copyright (c) 1988 by Raymond Joseph Mooney. This program may be freely 
;;;; copied, used, or modified provided that this copyright notice is included
;;;; in each copy of this code and parts thereof.

;;;; This version of AQ assumes that events are represented as ordered lists of
;;;; nominal feature values and that complexes are represented similarly with
;;;; ?'s for values which are not constrained. Consequently, its language does
;;;; not include negation, internal disjunction, linear or structural features.
;;;; It has a general LEF function but does not support tolerances.

(sys:compile-load-if "wimberly:mooney.data;data-utilities")
(proclaim '(optimize speed (safety 0)))

(setf (get 'aq 'parameters) '(*max-star* *lef*))

(defparameter *trace-aq* nil  "AQ produces a trace when set to T")
(defparameter *print-with-feature-names* nil  "Print out complexes with feature names")

;; The LEF (Lexicographic Evaluation Functional) determines which complexes
;; are more preferable. A LEF is a list of criteria functions. A criteria
;; function takes two arguements, a complex and the current list of uncovered
;; + events and returns a value, where a lesser value indicates more preferable.
;; The current LEF function first maximizes coverage of + events in order to
;; minimize disjuncts and with in that minimizes the number of selectors.
(defparameter *lef* '(count-coverage count-selectors)) 

(defparameter *max-star* 1 "The beam width which controls the maximum size stars may achieve.")

(defun train-aq (examples)
  (setf examples (make-ordered-examples examples nil))
  (if (pos-neg?)
      (aq examples)
      (multi-aq examples)))

(defun test-aq (example aq-result)
  (setf example (make-ordered-example example nil))
  (if (pos-neg?)
      (if (cover-match aq-result (second example)) (positive-category) (negative-category))
      (multi-aq-test example aq-result)))

(defun aq-concept-complexity (aq-result)
    (if (pos-neg?)
	(cover-complexity aq-result)
	(let ((sum 0))
	  (dolist (alist-elt (rest aq-result) sum)
	    (incf sum (cover-complexity (third alist-elt)))))))
  
(defun aq (examples)
  ;;; AQ takes a list of examples where an example is a list of either + or -
  ;;; (indicating the class) and an event description. It returns a cover
  ;;; (a list of complexes) which covers all of the postive events and none
  ;;; of the negative ones.

  ;; First separate into + and - events and call AQ1
  (let ((pos-instances nil)(neg-instances nil)
	(pos-category (positive-category)) (neg-category (negative-category)))
    (dolist (example examples)
      (cond ((eq (first example) pos-category)
	     (push (second example) pos-instances))
	    ((eq (first example) neg-category)
	     (push (second example) neg-instances))
	    (t (error "Not + or -"))))
    (aq1 pos-instances neg-instances)))


(defun aq1 (pos-instances neg-instances)
  ;;; AQ1 takes a lists of positive and negative events and returns a cover
  ;;; (a list of complexes) which covers all of the postive events and none
  ;;; of the negative ones.

  (let ((star nil)(cover nil)(seed nil)(best-complex nil)(delta 0))
    ;; Use seeds which are not covered by any previous star until there
    ;; are no more such seeds.
    (do ((seed-instances pos-instances))
	((null seed-instances))
      (setf seed (pop seed-instances))     ; pick a seed from the available set
      (setf pos-instances (remove seed pos-instances))
      (trace-print *trace-aq* "~%~%Seed: ~A" seed)
      ;; Generate a bounded star covering this seed but not any of the - events
      (setf star (generate-star seed neg-instances pos-instances))
      ;; Since generate-star returns a list of complexes sorted from best to
      ;; worst according to the LEF, the best is the first element of the star
      (setf best-complex (first star))
      (trace-print *trace-aq* "~%Best complex: ~A" best-complex)
      (push best-complex cover)            ; add the best complex to the cover
      ;; Remove from the set of possible seeds those which are covered by any
      ;; complex in the star
      (setf seed-instances (remove-if #'(lambda (instance)
					 (dolist (complex star)
					   (when (match complex instance)
					     (return t))))
				     seed-instances))
      ;; Remove from the set of positive events those covered by the chosen
      ;; complex
      (setf pos-instances (remove-if #'(lambda (instance)
					 (match best-complex instance))
				     pos-instances)))
    ;; Pick seeds and generate bounded stars until all + events are covered
    (do nil
	((null pos-instances))
      (setf seed (pop pos-instances))
      (trace-print *trace-aq* "~%~%Seed: ~A" seed)
      (setf star (generate-star seed neg-instances pos-instances))
      (setf best-complex (first star))
      (trace-print *trace-aq* "~%Best complex: ~A" best-complex)
      (push best-complex cover)
      (setf pos-instances (remove-if #'(lambda (instance)
					 (match best-complex instance))
				     pos-instances))
      (incf delta))                 ; delta is an estimate of distance from
                                    ; the minimal # of complexes
    (trace-print *trace-aq* "~%~%Cover minimal within ~D complexes" delta)
    cover))


(defun generate-star (seed neg-instances pos-instances)
  ;;; Generate a star which covers seed but not any of the neg-instances
  ;;; If star ever gets larger than *max-star* complexes then trim it the
  ;;; best *max-star* complexes as judged by the LEF. The star is
  ;;; represented as a list of "evaled-complexes" which are cons-cells with a
  ;;; complex in their CAR and a list of LEF values in their CDR.  This prevents
  ;;; unnecessary recalculating LEF values. 

  ;; Initialize the star to an evaled-complex for the most general complex
  (let ((star (compute-lef-values (initialize-star) pos-instances))
	new-star)
    ;; For each negative event specialize each complex in the star which matches
    ;; the negative event just enough so it doesn't cover it.
    (dolist (neg-instance neg-instances)
      ;; if |star|>*max-star* then trim it to the best complexes by sorting it
      ;; according to the LEF and taking the best *max-star* complexes
      (when (> (length star) *max-star*)
	(trace-print *trace-aq* "~%Trimming star to best ~D" *max-star*)
	(setf star (subseq (sort star #'lef-less-than :key #'rest)
			     0 *max-star*)))
      (trace-print *trace-aq* "~%Current star: ~A ~%~%Processing neg event ~A"
		   star neg-instance)
      (setf new-star (update-star star seed neg-instance pos-instances))
      ;; check if star has went empty due to limited search or noise (should never happen otherwise)
      (if (null new-star)
	  (when *trace-aq*
	    (if (equalp seed neg-instance)
		(format t "~%~%Star empty due to noise.~% Not consistent with: ~A" neg-instance)
		(format t "~%~%Star empty due to limited search.~% Not consistent with: ~A" neg-instance)))
	  (setf star new-star)))
    ;; Before returning the star, sort it, trim it if necessary, and return only
    ;; complexes, not evaled complexes
    (setf star (sort star #'lef-less-than :key #'rest))
    (when (> (length star) *max-star*)
      (trace-print *trace-aq* "~%Trimming star to best ~D" *max-star*)
      (setf star (subseq star 0 *max-star*)))
    (trace-print *trace-aq* "~%Final star: ~A" star)
    (mapcar #'(lambda (evaled-complex) (first evaled-complex))	star)))


(defun update-star (star seed neg-instance pos-instances)
  ;;; Specializes complexes in the star so that none match the given instance 
  ;;; for a negative example but each still covers the seed.  Calculates new
  ;;; LEF values for any new complexes to get evaled-complexes

  (setf star (mapcan #'(lambda (evaled-complex) 
			(if (match (first evaled-complex) neg-instance)
			    (compute-lef-values
			      (specializations-against (first evaled-complex)
						       neg-instance seed)
			      pos-instances)
			    (list evaled-complex)))
		    star))
  ;; Remove from the star those complexes which are more specific than some other
  (dolist (evaled-complex1 star)  
    (dolist (evaled-complex2 (rest (member evaled-complex1 star)))
      (cond ((more-general? (first evaled-complex1) (first evaled-complex2))
	     (setf star (remove evaled-complex2 star)))
	    ((or (more-general? (first evaled-complex2) (first evaled-complex1))
		 (equal (first evaled-complex1) (first evaled-complex2)))
	     (setf star (remove evaled-complex1 star))))))
  star)


(defun specializations-against (complex neg-instance seed)
  ;;; Specialize the given complex just enought so it doesn't cover the
  ;;; negative event but still covers the seed. For each feature in
  ;;; complex which is "?" change it to the value in seed to obtain a
  ;;; specialization unless the value in seed and neg-instance are the same.

  (do ((complex-rest complex (rest complex-rest))
       (neg-rest neg-instance (rest neg-rest))
       (complex-bef  nil (append complex-bef (list (first complex-rest))))
       (seed-rest seed (rest seed-rest))
       (specializations nil))
      ((null complex-rest) specializations)
    (if (and (eq (first complex-rest) '?)
	     (not (eq (first neg-rest) (first seed-rest))))
	(push (append complex-bef (list (first seed-rest)) (rest complex-rest))
	      specializations))))


(defun match (generalization instance)
  ;;; Match function for a simple feature vector representation where "?" is a 
  ;;; wildcard
  
  (do ((gen generalization (rest gen))
       (inst instance (rest inst)))
      ((null gen) t)
    (unless (or (eq (first gen) '?)
		(eq (first gen) (first inst)))
      (return nil))))

(defun more-general?  (x y)
  ;;; Returns T iff generalization x is strictly more general than 
  ;;; generalization y for a simple feature vector representation. 
  ;;; For x to be more general than y,  they must match and x must 
  ;;; have a "?" where y has a specific value; however y must never 
  ;;; have a "?" where x has a specific value

  (cond ((or (null x)(null y)) nil)
	((and (eq (first x) '?) (not (eq (first y) '?))
	      (or (equal (rest x)(rest y)) 
		  (more-general? (rest x) (rest y)))) 
	 t)
	((equal (first x) (first y)) 
	 (more-general? (rest x) (rest y)))))


(defun initialize-star ()
  ;;; Initialize G to a set containing the all "?" feature vector

  (list (mapcar #'(lambda (feature) (declare (ignore feature)) '?)  *domains*)))


(defun compute-lef-values (star pos-instances)
  ;;; Calculate LEF values for each complex and return a list of evaled 
  ;;; complexes: (complex lef-value1 lef-value2 ...)

  (mapcar #'(lambda (complex)
	      (cons complex
		    (mapcar #'(lambda (lef-fn)
				(funcall lef-fn complex pos-instances))
			    *lef*)))
	  star))


(defun lef-less-than (value-list1 value-list2)
  ;;; Returns T iff the first list of LEF values is less than the second
  ;;; Interprets values lexicographically

  (or (< (first value-list1) (first value-list2))
      (and (rest value-list1)
	   (equal (first value-list1)(first value-list2))
	   (lef-less-than (rest value-list1)(rest value-list2)))))


(defun count-coverage (complex pos-instances)
  ;;; Counts the number of positive events covered by the complex
  ;;; and returns the negation of this value so less is better

  (let ((match-count 0))
    (dolist (pos-instance pos-instances (- match-count))
      (if (match complex pos-instance)
	  (incf match-count)))))


(defun count-selectors (complex pos-instances)
  ;;; Counts the number of selectors in a complex, assuming less is better

  (declare (ignore pos-instances))
  (let ((selector-count 0))
    (dolist (feature complex selector-count)
      (unless (eq feature '?) (incf selector-count)))))


(defun cover-match (cover instance)
  ;;; If cover covers instance then return t
  (dolist (complex cover)
    (when (match complex instance)(return t))))

(defun cover-complexity (cover)
  (let ((sum 0))
    (dolist (complex cover sum)
      (incf sum (count-selectors complex nil)))))

(defun print-cover (cover)
  ;;; Print cover in a nice format
  (dolist (complex cover)
    (format t "~%~A" (format-complex complex))
    (unless (eq complex (first (last cover)))
      (format t " or"))))


(defun format-complex (complex)
  ;;; Format a complex into a prettier form for output
  ;;; ( (feature-name value) ...) for features with constrained values
  (if *print-with-feature-names*
      (do ((complex-rest complex (rest complex-rest))
	   (feature-rest *feature-names* (rest feature-rest))
	   (formated-complex nil))
	  ((null complex-rest) (nreverse formated-complex))
	(unless (eq (first complex-rest) '?)
	  (push (list (first feature-rest) (first complex-rest)) formated-complex)))
      complex))


;;;; ==========================================================================================
;;;; Multiple category stuff
;;;; ==========================================================================================


(defun multi-aq (examples)
  (dolist (cat *categories*)
    (setf (get cat 'training-examples) nil))
  (dolist (example examples)
    (push (second example) (get (first example) 'training-examples)))
  (let ((cover-alist
	  (mapcar #'(lambda (cat)
		      (list cat (length (get cat 'training-examples))
			    (aq1 (get cat 'training-examples)
				 (mapcan #'(lambda (other-cat) (copy-list (get other-cat 'training-examples)))
					 (remove cat *categories*)))))
		  *categories*)))
    (cons (maximum-category-label cover-alist *categories*)
	  cover-alist)))


(defun multi-aq-test (example cover-alist)
  (let ((class-counts (mapcan #'(lambda (alist-elt)
				  (if (cover-match (third alist-elt) (second example))
				      (list (cons (first alist-elt) (second alist-elt)))))
			      (rest cover-alist))))
    (if class-counts
	(maximum-label class-counts *categories*)
	(first cover-alist))))


(defun maximum-category-label (count-alist &optional tie-breaker-list)
  "Returns the label in count-alist ((label count) ...)
   with the maximum count.  Break ties according to *tie-breaker*"
  (let (max-labels (max-count 0))
    (dolist (count-cons count-alist)
      (cond ((> (second count-cons) max-count)
	     (setf max-count (second count-cons))
	     (setf max-labels (list (car count-cons))))
	    ((= (second count-cons) max-count)
	     (push (first count-cons) max-labels))))
    (if (or (eq *tie-breaker* 'random) (null tie-breaker-list))
	(pick-one max-labels)
	(dolist (item tie-breaker-list)
	  (when (member item max-labels)
	    (return item))))))

