;;;; EXPLORER.LISP - an AM-like program.  It is called EXPLORER, rather than AM,
;;;; because it is substantially more limited than AM.  However, it contains much
;;;; of the spirit of AM.  It explores a space of concepts, guided by
;;;; "interestingness" measures and makes conjectures about the concepts
;;;; it explores.  (The design of this program was heavily influenced
;;;; by the PYTHAGORUS program of Tanimoto, described in his "Elements of
;;;; Artificial Intelligence" textbook.)

;;;; Copyright (C) 1988 by Jude William Shavlik and Raymond Joseph Mooney
;;;;       This program may be freely copied, used, or 
;;;;       modified provided that this copyright notice 
;;;;       is included in each copy of this code and parts thereof.

;;;; In EXPLORER, concepts are conjuctive collections of attribute/value pairs.
;;;; EXPLORER has an agenda containing tasks to perform.  Performing these tasks
;;;; may in turn lead to additional tasks being added to the agenda.  The order
;;;; tasks are performed and even the tasks to be performed are guided by
;;;; "interestingness" measures. 

;;;; Initially, the system takes elements from a (pre-defined) list of instances
;;;; and determines if they are members or non-members of the first concept
;;;; currently being explored.  Depending on the results, more examples may be
;;;; explored, conjectures may be made, and new concepts introduced.
;;;; The system keeps exploring the concept space until it runs out of tasks
;;;; to perform or until its time allocation expires.  At the end, the concepts
;;;; deemed to be the most interesting are reported.

;;;; New concepts are introduced by specializing (implemented) or 
;;;; generalizing (unimplemented) the current concept. 

;;;; Sample data for the system can be found in the file DISCOVERY-DATA
;;;; and POLYGON-DATA.

;;;; The following fields are maintained for each concept explored.
;;;    definition        - a conjunction collection of attribute/value pairs
;;;    generalization    - the chosen generalization of this concept
;;;    specializations   - specializations of this concept being explored
;;;    interestingness   - the interestingness value for this concept
;;;    examples-not-explored - examples still to be explored
;;;    unused-features   - features in the domain not in the definition
;;;    pos-examples      - examples of this concept
;;;    neg-examples      - counter-examples of this concept
;;;    conjectures       - conjectures made about this concept

;;;; --------------------------------------------------------------------------------------
;;;;                       Global Variables
;;;; --------------------------------------------------------------------------------------

(defvar *new-examples*       10 "Number of new examples to test when exploring a concept.")
(defvar *agenda*            NIL "Tasks to perform, ordered by interestingness.")
(defvar *max-agenda-size*    20 "Maximum number of tasks on the agenda.")
(defvar *trace-explorer?*   NIL "Report details of the exploration of the concept space?")
(defvar *report-agenda?*    NIL "Report the agenda whenever it is updated?")
(defvar *all-examples*      NIL "List of all instances.")
(defvar *domains*           NIL "User-supplied lists of features and their possible values.")
(defvar *all-features*      NIL "List of features - set by run-explorer.")
(defvar *concepts-explored* NIL "Concepts already encountered - used to improve efficency.")
(defvar *concepts-to-see*     5 "Periodically, report the best N concepts.")
(defvar *concept-hierarchy* NIL "Pointer into the concept hierarchy being explored.")
(defvar *max-run-time*      600 "Maximum run time (in seconds).")
(defvar *inter-report-time* 120 "Every this many seconds, report the most interesting concepts.")


;;;; --------------------------------------------------------------------------------------
;;;;                      The Main Functions
;;;; --------------------------------------------------------------------------------------

(defun run-explorer (examples &optional initial-concept-defn  &aux initial-concept)
  ;;; Start exploring the space defined by *domains* at the concept
  ;;; with this definition.
   (setf *all-examples* examples)
   (setf *all-features* (mapcar #'(lambda (x) (first x)) *domains*))
   (if (check-examples *all-examples*) 
       (format t "~%~%Examples verified.")
       (error "Examples must be edited."))
   (setf *concepts-explored* nil)
   (gensym 0)
   (setf initial-concept (gensym "concept-"))
   (initialize-concept initial-concept initial-concept-defn)
   (setf *concept-hierarchy* initial-concept) ; Need a pointer into the concept hierarchy.
   (setf *agenda* nil)
   ;; Agenda items are of the form (<interestingness value> <task>)
   (add-to-agenda `(1 (generate-concept-examples (quote , initial-concept))))
   (format t "~%~%Beginning exploration ...~%~%")
   (explore-concept-space)
   (report-results)
   'done)

(defun explore-concept-space (&aux (start-time (get-internal-run-time)) 
                                   (last-report-time start-time))
  ;;; Take the first item off of the agenda, process it, and repeat.
  ;;; Processing my add more items to the agenda.
  ;;; Continue until out of time or there are no more items on the agenda.
   (loop (cond (*agenda* (eval (second (pop *agenda*))))
               (t (format t "~%~%Out of tasks to perform.~%")
                  (return)))
         (when (> (- (get-internal-run-time) start-time) 
                  (* *max-run-time* internal-time-units-per-second))
           (format t "~%~%Time Limit Exceeded.~%")
           (return))
         (when (> (- (get-internal-run-time) last-report-time) 
                  (* *inter-report-time* internal-time-units-per-second)) 
           (report-results T)
           (setf last-report-time (get-internal-run-time)))))

(defun generate-concept-examples (concept &aux (examples-left 
                                                 (get concept 'examples-not-explored)))
  ;; Generate and test new examples, then update this concept.
   (when *trace-explorer?* 
     (format t "Testing examples on the concept~% ")
     (print-out-defn (get concept 'definition) T))
   (dotimes (i *new-examples*) 
     (if examples-left 
      (let* ((example (pop examples-left))
             (answer (apply-defn (get concept 'definition) (eval example))))
        (when *trace-explorer?* 
            (format t "   ~A ~15,3T~:[is not~;is~] an example.~%" example answer))
        (if answer
           (push example (get concept 'pos-examples))
           (push example (get concept 'neg-examples))))))
   (when *trace-explorer?* (format t "~%"))
   (setf (get concept 'examples-not-explored) examples-left) ; Save examples unexplored.
   (update-concept concept))

(defun update-concept (concept)
  ;;; Update the interestingness of this concept and create new tasks for specializing,
  ;;; generating more examples, or making conjectures.
   (setf (get concept 'interestingness) (determine-concept-interestingness concept))

   (add-to-agenda `(, (interest-of-conjecturing concept)
                    (generate-conjectures (quote , concept))))
   (add-to-agenda `(, (interest-of-generating-examples concept)
                    (generate-concept-examples (quote , concept))))
   (add-to-agenda `(, (interest-of-generalizing-concept concept)
                    (generalize-concept (quote , concept))))
   (add-to-agenda `(, (interest-of-specializing-concept concept)
                    (specialize-concept (quote , concept)))))

(defun generate-conjectures (concept)
  ;;; Generate conjectures about this concept.
  ;;; TO BE WRITTEN
  nil)

(defun specialize-concept (concept &aux (unused-feature (pop (get concept 'unused-features))))
  ;;; Specialize this concept.  Do this by choosing an unused feature and then building
  ;;; new concepts for each possible value of this feature.  Each new concept's
  ;;; definition is determined by the definition of the current concept, plus
  ;;; a specific value of the previously unconstrained feature.
  (dolist (value (determine-feature-values unused-feature))
    (let ((new-concept (gensym "concept-")))
      ;;  See if this is a concept has not already been investigated.  If it has,
      ;;  initialize-concept will return nil.
      (when (initialize-concept new-concept 
				(cons (list unused-feature value) (get concept 'definition)))
	(setf (get new-concept 'generalization) concept)
	(push new-concept (get concept 'specializations))
	(when *trace-explorer?*
	  (format t "Introducing the concept~% ")
	  (print-out-defn (get new-concept 'definition) T T))
	(update-concept new-concept))))
  ;; Consider specializing this concept again using a different feature
  (add-to-agenda `(, (interest-of-specializing-concept concept)
		   (specialize-concept (quote , concept)))))

(defun generalize-concept (concept)
  ;;; Introduce a generalization of this concept.
  ;;; Note, for simplicity, a concept has only one "parent" concept, although there
  ;;; often will be more than one generalization possible.  However, the generalizations
  ;;; not produced here may arise later through additional generalizations and specializations.
   (error "TO BE WRITTEN"))

(defun add-to-agenda (task &aux (task-worth (first task)))
  ;;; If this task has a non-zero value, insert it into the agenda, pruning
  ;;; the agenda if it gets too large.
    (When (> task-worth 0)
      ;; First remove any existing task which is the same since it may now have an
      ;; out-dated interestingness value.
      (setf *agenda* (delete task *agenda* :test #'(lambda (task1 task2)
						     (equal (second task1)
							    (second task2)))))
      (setf *agenda* (subsequence (merge-into-agenda task-worth task *agenda*) 
                                  0 *max-agenda-size*))
      (if *report-agenda?* (report-agenda))))

(defun merge-into-agenda (task-worth task agenda)
  ;;; Locate this task into its proper position in the agenda which is a list
  ;;; of tasks of the form (interestingness task-form) sorted by interestingness
   (cond ((null agenda) (list task))
        ((< task-worth (first (first agenda))) ; Too low of value to fit in here.
           (cons (first agenda) (merge-into-agenda task-worth task (rest agenda))))
        (t (cons task agenda))))

(defun apply-defn (concept-definition example)
  ;;; Determine if this example satisfies this concept definition.
  ;;; This means every feature of the concept must be true of the example.
   (every #'(lambda (feature-and-value) (member feature-and-value example :test 'equal)) 
          concept-definition))

(defun determine-concept-interestingness (concept &aux (hit-ratio (calculate-hit-ratio concept)))
  ;;; To determine how interesting a concept is, look at the number of conjectures
  ;;; made about it and also consider the fraction of examples investigated that are positive.
  ;;; If no examples have been investigated, inherit the interestingness of the parent
  ;;; (and add 25 because of the interestingness of being unexplored).
  (cond ((null (get concept 'definition)) 50) ; the always true concept has constant interest
	((numberp hit-ratio) 
	 (+ (* 400 (- hit-ratio (* hit-ratio hit-ratio))) (* 100 (length (get concept 'conjectures)))))
	(t (+  25 (get-parent-interestingness (get concept 'generalization))))))

(defun interest-of-generating-examples (concept)
  ;;; Estimate the interest in investigating more examples of this concept.
  ;;; If there are more examples to explore then use interestingness of concept
   (if (get concept 'examples-not-explored)
       (get concept 'interestingness)
      -100))					; All examples explored.

(defun interest-of-conjecturing (concept)
  ;;; Determine the interestingness of conjecturing about this concept.
  ;;; Unless there are no positive examples collected yet to base a conjecture on,
  ;;; make the same as concept interestingness
   (if (get concept 'pos-examples)
       (get concept 'interestingness)
       -100))

(defun interest-of-specializing-concept (concept &aux (hit-ratio (calculate-hit-ratio concept)))
  ;;; Determine the interest in specializing this example.
   (if (and (get concept 'unused-features)   ; Must have some features to add.
            (numberp hit-ratio))
      (* 50 hit-ratio) ; It is more desirable to specialize if most examples are positive,
      -100))           ; since specialization may eliminate some of the positive examples.

(defun interest-of-generalizing-concept (concept)
 ;;; Determine the interest in generalizing this example.
 ;;; TO BE WRITTEN
-100)

;;;; --------------------------------------------------------------------------------------
;;;;                               UTILITY FUNCTIONS
;;;; --------------------------------------------------------------------------------------

(defun calculate-hit-ratio (concept &aux (number-pos (length (get concept 'pos-examples)))
                                         (number-neg (length (get concept 'neg-examples))))
  ;;; Determine what fraction of the examples tested on this concept have been positive.
   (if (and (= number-pos 0) (= number-neg 0))
      'undefined
      (/ number-pos (+ number-neg number-pos))))

(defun get-parent-interestingness (parent-concept)
  ;;; If this is a concept, get its interestingness.  Otherwise return
  ;;; the interestingness of the "parent" of the root concept.
  ;;; This simplifies some other functions when the concept is the root concept.
   (if parent-concept (get parent-concept 'interestingness) 50))

(defun report-results (&optional intermediate-report?)
  ;;; Report the N most interesting concepts.
   (format t "~%~%The ~:[Final~;Current~] ~@(~R~) Most Interesting Concepts~%~%" 
      intermediate-report? *concepts-to-see*)
   (report-interesting-concepts
     (subsequence (sort (collect-concepts (get-most-general-concept *concept-hierarchy*))
                     #'> :key #'(lambda (x) (or (get x 'interestingness) 0)))
               0 *concepts-to-see*))
   (format t "~%"))

(defun collect-concepts (concept)
  ;;; Collect this concept and all of the concepts under it.
   (cons concept (mapcan #'collect-concepts (get concept 'specializations))))

(defun report-interesting-concepts (interesting-concepts)
  ;;; Report some information about these concepts.
   (mapc #'(lambda (concept) 
              (format t "~8,3F ~A~%~5T" (get concept 'interestingness) concept)
              (print-out-defn (get concept 'definition) T)
              (mapc #'(lambda (property) (report-concept-property concept property))
                    '(pos-examples neg-examples conjectures)))
         interesting-concepts))

(defun print-out-defn (definition &optional linefeed-when-done? second-linefeed?)
   (if definition
      (mapc #'(lambda (term) (format t " ~A=~A" (first term) (second term))) definition)
      (format t " <NO-CONSTRAINTS>"))
   (if linefeed-when-done? (format t "~%"))
   (if second-linefeed? (format t "~%")))

(defun report-concept-property (concept property)
    (when (get concept property)
       (format t "~7@T~@(~A~):" property)
       (mapc #'(lambda (value) (format t " ~A" value)) (get concept property)) 
       (format t "~%")))

(defun get-most-general-concept (concept)
  ;;; Climb the concept hierarchy to the top.
   (if (get concept 'generalization) 
     (get-most-general-concept (get concept 'generalization))
     concept))

(defun initialize-concept (concept concept-defn)
  ;; Create and initialize this concept.
   (setf (get concept 'definition) concept-defn)
   (unless (member concept *concepts-explored* :test #'equivalent-defns) 
    ;; Unless concept already created, create it.
     (push concept *concepts-explored*)
     (setf (get concept 'examples-not-explored) (mix-up *all-examples*))
     (setf (get concept 'unused-features) ; Collect features not in the concept defn. 
       (remove-if #'(lambda (feature) (assoc feature concept-defn)) *all-features*))
     (setf (get concept 'generalization)  nil)
     (setf (get concept 'specializations) nil)
     (setf (get concept 'pos-examples)    nil)
     (setf (get concept 'neg-examples)    nil)
     (setf (get concept 'conjectures)     nil)
     (setf (get concept 'interestingness) (determine-concept-interestingness concept))
     t));indicate that a new concept has been created

(defun check-examples (examples &aux (no-error? t))
  ;;; Check that the examples and the information in *domains* corresponds.
   (dolist (example examples)
     (dolist (feature *all-features*)
       (let ((value (second (assoc feature (eval example)))))
         (cond ((null value) 
                  (setf no-error? nil)
                  (format t "~%Attribute ~A not present in example ~A~%" feature example))
               ((member value (determine-feature-values feature)) nil)
               (t (setf no-error? nil)
                  (format t "~%~A in example ~A~% is not in the domain of ~A~%" 
                            value example feature))))))
   no-error?)

(defun report-agenda ()
  ;;; Report the items in the agenda (for debugging purposes).
   (format t "Agenda: ~A~%" (first *agenda*))
   (mapc #'(lambda (item) (format t"        ~A~%" item)) (rest *agenda*)))

(defun determine-feature-values (feature)
  ;;; Determine the possible values of this feature.
   (rest (assoc feature *domains*)))

(defun subsequence (sequence start end)
  ;;; Return a portion of this sequence, from start (inclusive) to end (exclusive).
   (if (or (null end) (>= end (length sequence)))
      sequence
      (subseq sequence start end)))

(defun mix-up (list)
  ;;; Randomize the elements in this list (non-destructively).
   (sort (copy-list list) #'(lambda (a b) (> (random 1000) 499))))

(defun equivalent-defns (concept1 concept2 &aux (defn1 (get concept1 'definition))
			  (defn2 (get concept2 'definition)))
  ;;; See if these are equivalent definitions.  They are if every term in
  ;;; defn 1 is in defn 2 and vice-versa.  Hence (A B C) and (B C A) are equivalent.
   (and (every #'(lambda (term) (member term defn2 :test #'equal)) defn1)
        (every #'(lambda (term) (member term defn1 :test #'equal)) defn2)))


