;;; cliche recognition on domain theories only

;;; performs cliche recognition on the domain theories contained in sets of files 
;;; provided (file-list).  Only cliches of length conj-length are recognized

(defun test-cliche-recog-dt (&optional (file-list *cliche-recogn-test-file-list*)
                                     (conj-length 2))
  (dolist (files file-list)
    (test-cliche-recog-on-dt files conj-length)))

    
(defun test-cliche-recog-on-dt (files &optional (conj-length 2))
  (format t "~%recognizing cliches for ~a" (car files))
  (reset-preds)
  (mapc #'load-source-test-file (cdr files))
  (test-domain-theory conj-length))

(defun test-domain-theory (&optional (conj-length 2))
  (pprint (process-domain-theory conj-length)))

(defun process-domain-theory (&optional (conj-length 2))
  (mapcar #'(lambda (pred)
              (list (car pred) (recognize-cliches-from-int-pred (cdr pred) conj-length)))
          *intensional-preds*))

(defun recognize-cliches-from-int-pred (pred conj-length)
  (mapcan #'(lambda (clause) 
              (recognize-cliches-from-clause (convert-clause-struct-to-prolog clause)
                                             conj-length))
          (r-clauses pred)))

;;; for the moment we'll recognize a single cliche per conjunction.

(defun recognize-cliches-from-clause (clause conj-length)
  (let* ((head (car clause))
         (body (cdr clause))
         (head-vars (cdr head))
         (recognized-cliches nil)
         (candidate nil)
         (conj nil)
         (old-vars head-vars))
    ;; reset old vars
    (setq old-vars head-vars)
    (do* ((i 0 (1+ i))
          (remaining-body body (cdr remaining-body))
          (current-literal (car remaining-body) (car remaining-body)))
         ((= i (- (length clause) conj-length)) (nreverse recognized-cliches))
;;;      (format t "~%processing the remaining body ~a" remaining-body)
      (setq conj (subseq remaining-body 0 conj-length))
      (if (member '! conj)
        (setq candidate nil) ; don't learn conjunctions including cut!
        (setq candidate 
              (recognize-cliche-from-conjunction conj head-vars old-vars i clause)))
      (when candidate
        (push (list conj candidate) recognized-cliches))
      ;; tack on vars from current predicate
      (when (listp current-literal)
        (setq old-vars (union (cdr current-literal) old-vars)))
    )))

;;; parameters are as follows:
;;;   conj - conjunction under consideration
;;;   head-vars - variables of the head of the clause
;;;   old-vars - old variables up to the conjunction
;;;   position - position in the clause that the conjunction begins (0 = begins with
;;;     leftmost literal of the body)
;;;   clause - the complete clause including the head

;;;  local vars are:
;;;   new-var-array - array that stores the new variables introduced at each position of 
;;;     the conjunction 
(defun recognize-cliche-from-conjunction (conj head-vars old-vars position clause) 
  (declare (ignore position clause)) ;; - may need these sometime
  (let* ((all-vars old-vars)
         (conj-length (length conj))
         (new-vars nil)
         (new-var-array (make-array conj-length))
         (old-var-array (make-array conj-length))
         (*var-restrictions-recogn* nil))
    (declare (special *var-restrictions-recogn*))
    (lp for i from 0 to (1- conj-length) for literal in conj 
        when (listp literal)
        do
        ;; set up new and old var arrays
        (setq new-vars (set-difference (cdr literal) all-vars :test #'var-eq))
        (setf (aref old-var-array i) all-vars)
        (when new-vars 
          (setq all-vars (append new-vars all-vars)))
        (setf (aref new-var-array i) new-vars)
        ;; recognize variabilization restrictions
        (setq *var-restrictions-recogn* nil)
        (dolist (restriction *all-var-restrictions*)
;;          (format t "~%processing the ~a restriction" (car restriction))
          (recognize-var-restriction restriction conj head-vars old-vars i new-var-array
                                     old-var-array))
        when (listp literal)
        collect 
;        *var-restrictions-recogn*
        (list (list "Pred Restrictions:" (recognize-pred-restr-from-conjunction conj))
              (List "Var Restrictions:" *var-restrictions-recogn*)))))

;;; for the moment only consider the case where source vars are determined - i.e., don't
;;; have to pursue alternatives

(defun recognize-var-restriction (restriction conj head-vars old-vars pos new-var-array
                                              old-var-array)
  (let* ((restriction-struct (cdr restriction))
         (source-pos (var-restriction-source-pos restriction-struct))
         (source-pos-vals (possible-pred-pos-vals source-pos pos)))
    (dolist (source-pos-val source-pos-vals)
      (recognize-var-restriction-for-source-pos source-pos-val restriction conj head-vars old-vars pos 
                                                new-var-array old-var-array))))

(defun possible-pred-pos-vals (source-pos current-pos)
  (cond ((listp source-pos) source-pos)
        ((eql source-pos 'pred-pos)
         (lp for i from 0 to (1- current-pos) collect i))
        (t (list source-pos))))

(defun recognize-var-restriction-for-source-pos (source-pos-val restriction conj head-vars old-vars 
                                                 pos new-var-array old-var-array)
  (declare (special *var-restrictions-recogn*))
  (let* ((restriction-name (car restriction))
         (restriction-struct (cdr restriction))
         (var-att (var-restriction-var-att restriction-struct))
         (var-pos (var-restriction-var-pos restriction-struct))
;         (type (var-restriction-type restriction-struct))
         (include-all-vars? (var-restriction-include-all-vars? restriction-struct))
         (current-vars (cdr (nth pos conj)))
         (source-vars 
          (cond ((eql source-pos-val 'current-pos)
                 (apply-var-att current-vars var-att (aref new-var-array pos)))
                ((numberp source-pos-val)
                 (apply-var-att (cdr (nth source-pos-val conj)) var-att 
                                (aref new-var-array source-pos-val)))
                ((eql var-att 'new)
                 (warn "cannot use new as var-att with ~a as source pos" source-pos-val)
                 nil)
                (t 
                 (case source-pos-val
                   (head head-vars)
                   (old old-vars)
                   (t (warn "unimplemented source-pos in var-restriction ~a" source-pos-val)
                      nil)))))
         (ok-vars (intersection source-vars current-vars :test #'var-eq))
         (var-pos-val
          (if (and var-pos ok-vars (null (cdr ok-vars)) (null include-all-vars?))
            (position (car ok-vars) current-vars :test #'var-eq)
            nil))
;;; types could be a little difficult - we wont worry about them for now  
         (type-val nil))
    (cond ((null ok-vars) nil)
          (include-all-vars?
           (process-include-all-vars-recogn ok-vars source-vars source-pos-val current-vars
                                            restriction-name))
          (t 
           (push 
            (create-recog-var-restriction restriction-name source-pos-val var-pos-val type-val)
            *var-restrictions-recogn*)))))

(defun apply-var-att (vars att new-vars)
  (if (eql att 'new) new-vars vars))
      

;;; for now we'll just hand create these - should be a more general way of doing it - 
(defun create-recog-var-restriction (restriction-name source-pos-val var-pos-val type-val
                                     &optional include-all-vars? unordered? num-subst subst-opt?)
  (declare (ignore include-all-vars?)) ; may need this some day
  (cons restriction-name 
        (case restriction-name
          (include-new-var (list source-pos-val type-val var-pos-val))
          (introduces-new-var (list type-val var-pos-val))
          (include-old-var (list source-pos-val var-pos-val type-val))
          (use-same-vars (list source-pos-val num-subst unordered? subst-opt?))
          (t (warn "~a is an unimplemented restriction" restriction-name) 
             nil))))

;;; figure out if you want to use the values on the structure for unordered?, etc.
(defun process-include-all-vars-recogn (ok-vars source-vars source-pos-val current-vars 
                                                restriction-name)
  (declare (special *var-restrictions-recogn*))
  (let* ((ordered-missmatches (length (var-missmatches source-vars current-vars nil)))
         (unordered-missmatches (length (var-missmatches source-vars current-vars t)))
         (unordered? (< unordered-missmatches ordered-missmatches))
         (num-subst (min unordered-missmatches ordered-missmatches)))
    (if (and (> (length ok-vars) 1) 
             (<= num-subst (floor (/ (length source-vars) 2)))) ; at least 50% match
      (push 
       (create-recog-var-restriction restriction-name source-pos-val nil nil t unordered? 
                                     num-subst nil) ; this is more restrictive (subst-opt? = nil)
       *var-restrictions-recogn*))))


;;; code for converting clause structures into prolog lists (maybe this belongs in count.lisp)

(defun convert-clause-struct-to-list (clause)
  (cons (cons (clause-head clause) (clause-parameters clause)) 
        (clause-body clause)))

(defun convert-clause-struct-to-prolog (clause)
  (let ((list (convert-clause-struct-to-list clause))
        (var-counter -1)
        (parameter-a-list nil))
    (dolist (literal list)
      (when (listp literal)
        (dolist (arg (cdr literal))
          (if (and (pcvar-p arg) (null (assoc arg parameter-a-list)))
            (push (cons arg (make-pcvar :id (incf var-counter))) parameter-a-list)))))
    (sublis parameter-a-list list)))



;;;  recognizing predicate restrictions

(defun recognize-pred-restriction (literal)
  (let* ((pred-struct (get-r-struct (car literal)))
         (pred-type
          (cond ((pred-p pred-struct) 'ext-pred)
                ((builtin-p pred-struct) 'comp)
                (t 'pred)))
         (new-var-types (remove-duplicates (new-var-types pred-struct)))
         (new-var-type-restr
          (cond ((and new-var-types (cdr new-var-types))
                 (list 'supports-new-var nil))
                (new-var-types 
                 (list 'supports-new-var (car new-var-types)))
                (t nil))))                
    (if new-var-type-restr
      (list pred-type new-var-type-restr)
      (list pred-type))))
    


(defun pred-supports-new-var (pstruct)
  (let ((mode (r-mode pstruct)))
    (some #'(lambda (m) (or (eql m :-) (eql m :?))) mode)))


;;; cliche recognition using instantiations of the unconstrained cliche

(defun test-cliche-recog-uc (&optional (trace '(:l :ci)))
  (let ((uc-instantiations nil))
    (mapc #'(lambda (test-files test-fn concept)
              (set-up-test-files test-files)
              (format t "~%~%testing UNCONSTRAINED CLICHE on ~a~%" concept) 
              (apply test-fn trace :available-cliches '(unconstrained)
                     '(:create-preds-from-cliches t))
              (format t "~%~% cliche variabilizations checked: ~a ~% total variabilizations checked ~a"
                      *cliche-variabilizations-checked* *variablizations-checked* )
              (if *cliches-to-be-named*
                (push (list test-files concept 
                            (mapcar #'(lambda (c) (copy-r (get c 'r-struct))) 
                                    *cliches-to-be-named*)
                            *learned-description*) 
                      uc-instantiations))
              )
          *cliche-test-files*
          *cliche-test-functions*
          *cliche-test-concepts*)
    (process-uci-cliches uc-instantiations)
    uc-instantiations))


(defun process-uci-cliches (uc-instantiations &optional (conj-length 2))
  (pprint (recognize-uci-cliches uc-instantiations conj-length)))


;;; do cliche recognition from tripples stored in uc-instantiations (i.e., uci).  The tripples
;;; are of the form <files, concept, uc-cliche instantiations>

(defun recognize-uci-cliches (uc-instantiations &optional (conj-length 2))
  (nreverse
  (mapcar 
   #'(lambda (uci) 
       (set-up-test-files (car uci))
       (list (second uci)
             (fourth uci)
             (mapcar #'(lambda (cl-rule)
                         (recognize-cliches-from-int-pred cl-rule conj-length))
                     (third uci))))
   uc-instantiations)))


;;; cliche recognition form learned concept descriptions

;;; test function for recognizing cliches from the operational definitions produced
;;; by the test files  (lcd - i.e., learned
;;; concept description).

(defun test-cliche-recog-lcd (&optional (trace '(:l :ci)) 
                                        (cliche-test-files *cliche-test-files*)
                                        (cliche-test-functions *cliche-test-functions*)
                                        (cliche-test-concepts *cliche-test-concepts*))

  (let ((lcd-instantiations nil))
    (mapc #'(lambda (test-files test-fn concept)
              (set-up-test-files test-files)
              (format t "~%~%testing cliches on ~a~%" concept) 
              (apply test-fn trace :available-cliches (list *cliche-names*))
              (format t "~%~% cliche variabilizations checked: ~a ~% total variabilizations checked ~a"
                      *cliche-variabilizations-checked* *variablizations-checked*)
              (fix-up-lcd)
              (push (recog-cliches-from-lcd) lcd-instantiations))
          cliche-test-files
          cliche-test-functions
          cliche-test-concepts)
    (nreverse lcd-instantiations)))


;;; right now just do it if info-gain of conjunction exceeds info-gain of first literal
;;; use find-max-literal or something to get best first literal gain later

(defun recog-cliches-from-lcd (&optional (lcd  *learned-description*) 
                                         (lcd-head *learned-description-head*)
                                         (conj-length 2))
  (fix-up-lcd lcd lcd-head)
  (list (r-name lcd-head) 
        lcd
        (mapcan #'(lambda (l) 
                    (recog-cliches-from-lcd-clause l (r-vars lcd-head)
                                                   (r-vars lcd-head) conj-length))
                lcd)))

;;; performs cliche recognition from the clause (or partial clause) of an operation definition 
;;; given the first literal of the clause (l), the variables of the head of the clause 
;;; (head-vars) the variables prior to the start of the clause (old-vars - this is necessary
;;; for processing partial clauses, otherwise its just head-vars), and the length of the 
;;; cliches to find (conj-length).

(defun recog-cliches-from-lcd-clause (l head-vars old-vars conj-length)
  (let ((recogn-cliches nil)
        (first-literal-gain nil)
        (conj-gain nil)
        (new-vars nil)
        (clause (convert-literal-conj-to-list l))
        (conj nil))
    (do ((literal l (literal-next literal))
         (clause-remainder clause (cdr clause-remainder))
         (i (- (length clause) (1- conj-length)) (1- i))
         (position 0 (1+ position)))
        ((zerop i) (nreverse recogn-cliches))
      (setq first-literal-gain (gain-gain (compute-info-gain-literal-conj literal 1)))
      (setq conj-gain (gain-gain (compute-info-gain-literal-conj literal conj-length)))
      (setq conj (subseq clause-remainder 0 conj-length))
      (if (and (> conj-gain 0) (> conj-gain first-literal-gain))
        (push (list conj
                    (recognize-cliche-from-conjunction conj head-vars 
                                                       old-vars position clause))
              recogn-cliches))
      (setq new-vars (compute-new-vars (literal-variablization literal) old-vars))
      (setq old-vars (if new-vars (append old-vars new-vars))))))


;;; useful misc. functions for cliche recognition and possibly other stuff

;;; loads given files from test directory

(defun set-up-test-files (test-files)
  (reset-preds)
  (mapc #'load-source-test-file test-files))

    ;;; fixes up lcd on if pos tuples are empty for the first literal

(defun fix-up-lcd (&optional (lcd  *learned-description*) 
                             (lcd-head *learned-description-head*))
  (if lcd
    (let ((pos (r-pos lcd-head))
          (neg (r-neg lcd-head))
          (vars (r-vars lcd-head)))
      (unless (literal-pos (car lcd))
        (fix-up-lcd-literal (car lcd) pos neg vars))
      (fix-up-lcd (cdr lcd) lcd-head))))

(defun fix-up-lcd-literal (literal pos neg vars)
  (when literal
    (let* ((pred (get-r-struct (literal-predicate-name literal)))
           (varzn (literal-variablization literal))
           (negated? (literal-negated? literal))
           (new-vars (compute-new-vars varzn vars))
           (new-pos (generalized-extend-tuples pred pos varzn negated? new-vars))
           (new-neg (generalized-extend-tuples pred neg varzn negated? new-vars)))
      (setf (literal-pos literal) pos)
      (setf (literal-neg literal) neg)
      (setf (literal-new-pos literal) new-pos)
      (setf (literal-new-neg literal) new-neg)
      (fix-up-lcd-literal (literal-next literal) new-pos new-neg 
                          (if new-vars (append vars new-vars) vars)))))

;;; accepts a literal and creates a conjunction in the form of a list by tracing the literal-next
;;; links of literal

(defun convert-literal-conj-to-list (literal)
  (do ((l literal (literal-next l))
       (list nil))
      ((null l) (reverse list))
    (push (cons (literal-predicate-name l) (literal-variablization l)) list)))


;;; accepts a literal and the length of a conjunction beginning with that literal and
;;; computes the info-gain of the conjunction.

(defun compute-info-gain-literal-conj (literal conj-length)
  (let ((pos (literal-pos literal))
        (neg (literal-neg literal))
        (last-literal literal))
    (dotimes (i (1- conj-length))
      (setq last-literal (literal-next last-literal)))
    (let ((new-pos (literal-new-pos last-literal))
          (new-neg (literal-new-neg last-literal))
          (current-state-value (current-metric (length pos) (length neg))))
      (GAIN-METRIC current-state-value 
                   (length new-pos)
                   (length (count-originals-extended pos new-pos))
                   (length new-neg)
                   (length (count-originals-extended neg new-neg))))))


