;  AGENT
;  Routines for having an agent perform in a tileworld

(in-package 'xtile)

;;;; Experiment control functions are now done in the experiment module.
;;;; Therefore this stuff is commented out. -mpf

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;  DO A TILEWORLD EXPERIMENT                                           ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Initialize a world and an agent, and then play a tileworld game.
; Returns: -- score obtained.
;(defun tw-exp (&optional seed)
;   (setf (tw-rand-seed w0) seed)
;   (if *display-on* (twinitdisp))
;   (tw-init w0)
;   (agent-init a0 w0)
;   (gc)
;   (setf w0 (play-tw-game w0 a0 *game-length*))
;   (if *display-on* (twclosedisp))
;   (tw-score w0))

;(defun tw-redo ()
;   (setf (tw-rand-seed w0) (tw-rand-seed-was w0))
;   (if *display-on* (twinitdisp))
;   (tw-init w0)
;   (agent-init a0 w0)
;   (setf w0 (play-tw-game w0 a0 *game-length*))
;   (if *display-on* (twclosedisp))
;   (tw-score w0))

(defun agent-init (mind world)
  (setf (agent-options mind) 
	(create-new-options (tw-hole-list world)))
  (setf (agent-intentions mind) nil)
  (setf (agent-removed-intentions mind) nil))
  
;Play a tileworld game:  Simulate concurrent acting and thinking by
;first performing one think step, keeping track of how long you take,
;and then acting for the same period of time, allowing the world to
;change then as well.  Because of the concurrency, the act cycle makes 
;use of the initial state of mind:  at the end of both acting and thinking
;the resulting changes to the agent's mental state are combined.  Note
;that the world only changes during the act cycle, so it doesn't need
;to be copied and then updated to simulate concurrency.
;Mutates:  --arg1 and arg2 (world and mind)
;Returns:  --world that results from playing the game

;;;;; Replaced by (xtw-experiment-step) [in the exper module],
;;;;;  which is the work-function for the event-loop. -mpf

;(defun play-tw-game (world mind *game-length*)
;  (with-open-file
;   (*history*  *historyfile* :direction :output
;	      :if-exists :rename)
;  (let ( think-time cycles-to-act mind1 mind2)
;    (keephist  "HOLES INITIALLY AROUND:"  'write-holes (tw-hole-list world))
;    (keephist "INITIAL MENTAL STATE:" 'write-mind  mind)
;    (setf zark 0)
;    (do ( (elapsed 0 (+ elapsed think-time)))
;	( (>= elapsed *game-length*) world)
;	(setf mind1 (agent-copy mind))
;	(setf mind2 (agent-copy mind))
;	(setf think-time (think world mind1))
;	(keephist "THINK TIME USED:"  'write-plain think-time)
;	(format t "~s" zark) (setf zark (1+ zark))
;	(setf cycles-to-act (- (floor (/ (min (+ elapsed think-time)
;					      *game-length*)
;				         (agent-act-time mind)))
;			        (floor (/ elapsed
;				          (agent-act-time mind)))))
; 	(multiple-value-setq (world mind2)
; 	    		     (act world mind2 cycles-to-act))
;	(merge-minds mind mind1 mind2))
;    world))
;)


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; THINK CYCLE                                                          ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;Perform one think step.  The strategy for determining what this step
;will be depends upon the agent's "think-strategy", chosen at initialization
;time.  Keep track of the amount of time (in computer msec's) used.
;Mutates:  arg2 (mind)
;Returns:  amount of time used

(defun think (world mind)
  (let (start-time stop-time work-done think-time)
    (setf start-time (get-internal-run-time))
    (setf work-done (funcall (agent-think-strategy mind) world mind))
    (setf stop-time (get-internal-run-time))
    (setf think-time (if work-done
			 (/ (- stop-time start-time) 1000)  ; milliseconds used
		         (agent-wait-time mind))) ; else pretend it took longer
    think-time))


;Engage in deliberation if there are any available options.  Otherwise do
;some planning.
;Returns: --  t if something useful was done; nil if nothing was.
;         --  mutates mind (arg1)

(defun deliberate-first (world mind)
  (cond ( (or (agent-options mind)
	      (and (agent-intentions mind)
	            (not (current-int (agent-intentions mind)))))
         (deliberate world mind)  t)
	( (something-to-plan mind) (expand-plans mind world) t)
	(t nil)))


; something to plan if there's a current intention and it's not fully planned
(defun something-to-plan (mind)
  (let ((curint (current-int (agent-intentions mind))))
    (if curint
	(or (not (act-subacts curint))
            (eq (act-type (car (last (act-subacts curint)))) 'rest)))))




;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; THINK CYCLE---DELIBERATION                                           ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defun deliberate (world mind)
  (let ( (curint (current-int (agent-intentions mind))))
    (funcall (agent-eval-strat mind) (filter mind curint) curint mind world)
    (setf (agent-options mind) nil)
    (keephist "MIND AFTER DELIBERATE:  " 'write-mind mind)))

(defun filter (mind curint)
  (let ( (opts (make-opt-list (agent-options mind)))
	 (thresh (agent-threshhold mind)))
    (filter-each opts thresh mind curint)))

(defun make-opt-list (options)
  (if (null options)
      nil
      (append (option-set-alt-acts (first options))
	       (make-opt-list (rest options)))))

(defun filter-each (opts thresh mind curint)
  (cond ( (null opts) nil)
	( (override (car opts) curint thresh)
	  (cons (car opts) (filter-each (cdr opts) thresh mind curint)))
	(t (make-opts-later-ints opts mind)
	   (filter-each (cdr opts) thresh mind curint))))

(defun override (option curint thresh)
  (if curint
    (>= (- (binding 'score (act-parms option))
	   (binding 'score (act-parms curint)))
        thresh)
    t))

(defun simple-evaluate (options curint mind unused-world)
  (declare (ignore unused-world))
  (if curint
      (let* ( (maxopt (findmax options))
	      (restopts (allbut options maxopt))
	     (maxoptval (if maxopt (binding 'score (act-parms maxopt))
			    0))
	     (curintval (if curint (binding 'score (act-parms curint))
			    0)))
	(if (>= curintval maxoptval)
	    (make-opts-later-ints options mind)
	    (and (make-current-int maxopt mind)
		 (make-opts-later-ints restopts mind)
		 (demote-to-later-int curint)
		 (kill-all-subplans mind))))
    (let* ( (maxopt (findmax options))
	    (restopts (allbut options maxopt))
	   (maxoptval (if maxopt (binding 'score (act-parms maxopt))
			  0))
	   (maxint (findmax (agent-intentions mind)))
	   (maxintval (if maxint (binding 'score (act-parms maxint))
			  0)))
      (if (> maxoptval maxintval)
	  (and (make-current-int maxopt mind)
	       (make-opts-later-ints restopts mind))
	  (and (promote-to-current-int maxint)
	       (make-opts-later-ints options mind))))))

(defun findmax (acts)
  (let ( (bestval -1) (restacts acts) thisval new best)
    (do () ((null restacts))
	(setf new (first restacts))
	(setf thisval (binding 'score (act-parms new)))
	(when (> thisval bestval) 
	      (setf best new)
	      (setf bestval thisval))
	(setf restacts (rest restacts)))
  best))

(defun seu-evaluate (options curint mind world)
  (if curint
      (let* ( (maxpr (findmaxseu options world))
	      (maxopt (cadr maxpr))
	      (restopts (allbut options maxopt))
	      (maxoptval (if maxopt (car maxpr) 0))
	      (curintval (seu curint world)))
	(if (>= curintval maxoptval)
	    (make-opts-later-ints options mind)
	    (and (make-current-int maxopt mind)
		 (make-opts-later-ints restopts mind)
		 (demote-to-later-int curint)
		 (kill-all-subplans mind))))
    (let* ( (maxpr (findmaxseu options world))
	    (maxopt (cadr maxpr))
	    (restopts (allbut options maxopt))
	    (maxoptval (if maxopt (car maxpr) 0))
	    (maxintpr (findmaxseu (agent-intentions mind) world))
	    (maxint (cadr maxintpr))
	    (maxintval (if maxint (car maxintpr) 0)))
      (if (> maxoptval maxintval)
	  (and (make-current-int maxopt mind)
	       (make-opts-later-ints restopts mind))
	  (and (when (and maxint (> maxintval 0))
		     (promote-to-current-int maxint))
	       (make-opts-later-ints options mind))))))

(defun findmaxseu (acts world)
  (let ( (bestval -1) (restacts acts) thisval new best)
    (do () ((null restacts))
	(setf new (first restacts))
	(setf thisval (seu new world))
	(when (> thisval bestval) 
	      (setf best new)
	      (setf bestval thisval))
	(setf restacts (rest restacts)))
    (list bestval best)))


(defun seu (act world)
  (let* ( seuval
	  (score (binding 'score (act-parms act)))
	  (holeid (binding 'hole (act-parms act)))
	  (hole (findhole holeid (tw-hole-list world)))
   	  (size (length (obj-cells hole)))
	  (time (obj-timeout hole))
	  (distance1 (finddist hole (car (tw-agent-list world))))
	  (distance (if (> (abs distance1) 0) distance1 1))
	  (tileavail (findavail hole world size)))
;(setf **score score)
;(setf **size size)
;(setf **time time)
;(setf **distance distance)
;(setf **tileavail tileavail)
(setf seuval 
;      (if (<= time 0)
;	  0                             ;no more time:  don't even consider
;	(/ (* score (/ time 10000))
	(/ score
	   (+ distance (* 2 tileavail))))
 (keephist "SEU:  " 'write-seu (list size time distance tileavail score seuval))
 seuval))
 

(defun findhole (hole holelist)
  (cond ( (eq hole (obj-name (car holelist))) (car holelist))
	( t (findhole hole (cdr holelist)))))

(defun finddist (obj1 obj2)
  (let ( (row1 (obj-row obj1))
	 (col1 (obj-col obj1))
	 (row2 (obj-row obj2))
	 (col2 (obj-col obj2)))
    (+  (abs (- row1 row2))
        (abs (- col1 col2)))))

(defun findavail (hole world size)
  (let (next-tile p1 p2 p3 p4 p5 p6 (total 0))
    (multiple-value-setq (next-tile p1 p2 p3 p4 p5 p6)
			 (tw-closest-tile hole world))
    (setf total (finddist hole next-tile))
    (do ( (i 1 (+ i 1)))
	((= i size))
	(multiple-value-setq (next-tile p1 p2 p3 p4 p5 p6)
			     (next-closest-tile p1 p2 p3 p4 p5 p6))
	(setf total (+ total (finddist hole next-tile))))
    total))

(defun promote-to-current-int (int)
  (setf (act-time int) 'now)
  (setf (act-status1 int) 'intention)
  (setf (act-status2 int) 'to-be-planned))

(defun demote-to-later-int (int)
  (setf (act-time int) 'later)
  (setf (act-status1 int) 'intention))

(defun make-current-int (opt mind)
  (setf (act-time opt) 'now)
  (setf (act-status1 opt) 'intention)
  (setf (act-status2 opt) 'to-be-planned)
  (setf (agent-intentions mind) (cons opt (agent-intentions mind))))

(defun make-opts-later-ints (opts mind)
  (mapcar #'(lambda (x) 
	      (setf (act-time x) 'later)
	      (setf (act-status1 x) 'intention))
	  opts)
  (setf (agent-intentions mind) (append (agent-intentions mind) opts)))

(defun allbut (list elt)
  (cond ( (null elt) list)
	( (eq (first list) elt) (rest list))
	(t (cons (first list) (allbut (rest list) elt)))))
	
; Kill all subplans (our action failed or we changed goals).

(defun kill-all-subplans (mind)
    (let (flat)
	(dolist (i (agent-intentions mind))
	    (when (act-subacts i)
		(dolist (subi (act-subacts i))
		    (dolist (subsubi (act-subacts subi))
		       (setf flat (cons subsubi flat)))
		    (setf flat (append flat (list subi))))
		(setf (act-subacts i) nil)))
	(dolist (i flat)
	    (setf (act-failed i) t))
	(setf (agent-removed-intentions mind) 
	      (append (agent-removed-intentions mind) flat))
	(setf (agent-intentions mind) 
	      (remove-intentions (agent-intentions mind) flat))))



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; DELIBERATION -- PLAN                                                 ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


(defun expand-plans (mind world)
  (let ( (selected (select-int (agent-intentions mind))))
    (when selected
	  (if (and (eq (act-type selected) 'fill)
    		   (binding 'hole (act-parms selected))
		   (or (null (act-subacts selected))
		       (eq (act-type (first (last (act-subacts selected)))) 'rest)))
	      (expand-fill selected world)))
    (keephist "MIND AFTER PLAN:  " 'write-mind mind)))

    
(defun select-int (ints)
  (cond ((null ints) nil)
	( (and (eq (act-type (first ints)) 'fill) 
	       (eq (act-time (first ints)) 'now))
	  (first ints))    ;NOT REALLY RIGHT:  should plan for later
                           ;intentions when there's nothing to do now
	(t (select-int (cdr ints)))))

(defun move-sequence ()
  (list (make-act :id (make-name 'act)
		  :type 'L
		  :atomic 't)
	(make-act :id (make-name 'act)
		  :type 'R
		  :atomic 't)))

(defun make-null-act ()
  (make-act :id (make-name 'act)
	    :type 'N
	    :atomic 't))




;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ACT/PERCEIVE CYCLE                                                   ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;Act for the same amount of time as you just thought for.  If act
;is successful, remove the satisfied intention from the mental state.
;After attempting an act, note any changes that occurred in the world
;while you were acting.
;Mutates:  arg1 and arg2 (world and mind)

(defun act (world mind cycles)
  (let* ( (cycle-length (agent-act-time mind))
	  old-world next-act moved-ok)
    (dotimes (i cycles mind)
	(setf old-world (tw-copy-world world))
	(setf next-act (select-next-atomic-act mind))
	(keephist "SELECTING ACT:  " 'write-each-act (list next-act))
 	(multiple-value-setq (moved-ok world) 
 	    	(safely-make-move old-world world 
 		       (act-type next-act) cycle-length))
 	(when moved-ok 
	      (setf (agent-intentions mind)
		    (remove-intentions (agent-intentions mind) 
				       (list next-act)))
	      (setf (agent-removed-intentions mind)
		    (cons next-act (agent-removed-intentions mind))))
	(if (not moved-ok)
	    (kill-all-subplans mind))
	(keephist "MIND AFTER ACT:  " 'write-mind mind)
	(keephist "HOLES NOW AROUND:  "  'write-holes (tw-hole-list world))
	(perceive mind old-world world)
	(keephist "MIND AFTER PERCEIVE:  " 'write-mind mind)))
    (values world mind))

(defun safely-make-move (old-world world move delta-t)
     (let ((stepres (tw-step world nil move delta-t))
 	  fellin)
	 (when (null (tw-agent-list world))
	       (keephist "WARNING:  " 'write-plain "Fell into a hole!")
	     (setf fellin t)
	     (setf world (tw-copy-world old-world)))
	 (values (and stepres (not fellin)) world)))



;Perceive environmental changes--both new options (new holes that have
;appears), and lost options and intentions (holes that have disappeared).
; *Future:  generalize to deal with other kinds of environmental changes,
;  e.g., new tiles
; Mutates arg1 (mind)

(defun perceive (mind old-world new-world)
  (let ( (new-holes (set-difference (tw-hole-list new-world)
				      (tw-hole-list old-world)
				      :key 'obj-name))
	 (lost-holes (set-difference (tw-hole-list old-world)
				     (tw-hole-list new-world)
				     :key 'obj-name)))
    (when new-holes (setf (agent-options mind)
			  (append (agent-options mind)
				  (create-new-options new-holes))))
    (when lost-holes (let ((lost-opps (create-lost-opps lost-holes)))
		       (keephist "LOST-OPPS:  "  'write-each-act lost-opps)
		       (setf (agent-options mind)
			     (remove-options (agent-options mind)
					  lost-opps))
		       (setf (agent-intentions mind)
			   (remove-intentions (agent-intentions mind)
					      lost-opps))
		       (setf (agent-removed-intentions mind)
			     (append lost-opps
			         (agent-removed-intentions mind)))))))


;Find the next atomic action to perform.  If there's a currently active
;intention, find the next available atomic subpart.  Otherwise, return
;the null action.
;Returns:  an atomic action to perform

(defun select-next-atomic-act (mind)
  (let ( (current (current-int (agent-intentions mind))))
    (cond ( (and current 
		 (find-base-level current)))
	  ( (make-null-act)))))



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; MERGE RESULTS OF THINK AND ACT CYCLES                                ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


;Since we're simulating concurrent acting and thinking, after one iteration
;of both processes, we've got to merge the resulting mental states.
;Thinking removes options and adds intentions; acting removes intentions
;which may or may not have succeeded (they may contain the 'failed' slot,
;in which case we shouldn't remove an empty parent intention, since we may
;want to try again);
;perceiving, which occurs during the act cycle, removes intentions and options
;and also adds options
;Mutates: --arg1 (mind)

(defun merge-minds (mind think-res act-res)
  (let ( (new-ints-think (acts-copy (agent-intentions think-res)))
	 (old-opts (opts-copy (agent-options mind)))
	 (think-opts (opts-copy (agent-options think-res)))
	 (act-opts (opts-copy (agent-options act-res))))
    (setf (agent-intentions mind)
	  (remove-intentions new-ints-think
		    (agent-removed-intentions act-res)))
    (setf (agent-options mind)
	  (remove-options act-opts
			  (act-list
			    (remove-options old-opts (act-list think-opts)))))
    (keephist "MIND AFTER MERGE:  "  'write-mind mind)))

