;;; -*- Mode:Common-Lisp; Package:USER; Base:10 -*-

(in-package #+:CLTL2 "CL-USER" #-:CLTL2 "USER")

;;;; Standard data file:  A standard example data file (possibly with a theory) sets a set of standard
;;;; global variables. The UNIVERSAL-TESTER only crucially relies on *RAW-EXAMPLES* but the rest are
;;;; standard for many systems. UNIVERSAL-TESTER makes use of *GOAL* and *THEORY* in some situations.
;;;; *** Learning systems should not alter the values of these variables or destructively modify them ***

;;;; *CATEGORIES*:     A list of all categories (classes) present in the data.

;;;; *FEATURE-NAMES*:  An ordered list of names for the features used to describe examples.

;;;; *DOMAINS*:        An ordered list of domains for each feature where a domain is either a list
;;;;                   of possible values or the symbol LINEAR to indicate a real valued feature.

;;;; *RAW-EXAMPLES*:   A list of examples where the first element of an example is its class.
;;;;                   The two standard formats for examples assumed by many systems are:
;;;;                        Ordered example: (<class> <ordered-feature-value-sequence>)  
;;;;                                    e.g. (+ (big red square))
;;;;                        Alist example:   (<class> (<feature> <value>) ... )  
;;;;                                    e.g. (+ (size big) (color red) (shape square))
;;;;                   ID3-ALL works with both where a sequence of feature values can be a list or an array.
;;;;                   UNIVERSAL-TESTER only assumes that the first element is the class and as long as
;;;;                   the learners are happy with the example format it is too.
;;;; *THEORY*:         For theory revision problems a list of rules suitable for DEDUCE.
;;;; *GOAL*:           A goal to be proven for positive examples using deduce, e.g. (CUP)

;;; Global variables used in data files
(defvar *raw-examples* nil    "List of examples")
(defvar *feature-names* nil   "A list of names for each feature")
(defvar *domains* nil         "A list defining the domain of each feature in the vector")
(defvar *categories* '(+ -)   "A list of all categories in the data")
(defvar *theory* nil          "Initial domain theory")
(defvar *goal*   nil          "Top-level goal for domain theory")
(defvar *missing-value* '?    "Feature value representing missing")
(defvar *noise*         nil   "Perform extra processing to handle noise and/or prevent over-fitting")

(defmacro trace-print (test-var &rest format-form)
  ;;; When test-var (usually a *-trace variable) is set then use formated print
  `(if ,test-var
       (format t ,@format-form)))

;;;;-------------------------------------------------------------------------------------------------------------
;;;;  Access functions for various info about data
;;;;-------------------------------------------------------------------------------------------------------------

(defun feature-name (feature-num)
  "Returns the name of a feature given its number"
  (or (elt *feature-names* feature-num) feature-num))

(defun feature-domain (feature)
  "Returns the domain of a feature given its number"
  (if (numberp feature)
      (elt *domains* feature)
      (elt *domains* (position feature *feature-names*))))

(defun binary-feature-p (feature)
  (member (feature-domain feature) '((true false)(false true)) :test #'equal))

(defun linear-feature-p (feature)
  "Returns T is given feature is linear"
  (eq (feature-domain feature) 'linear))

;;;;-------------------------------------------------------------------------------------------------------------
;;;; Functions for checking data for consistency
;;;;-------------------------------------------------------------------------------------------------------------

(defun check-data (&optional (examples *raw-examples*))
  (unless (eq (length *domains*)(length *feature-names*))
    (format t "~%~%Length of *domains* not same as *feature-names*"))
  (dolist (feature-name *feature-names*)
    (unless (symbolp feature-name)
      (format t "~%~%Illegal entry in *feature-names*: ~A" feature-name)))
  (dolist (domain *domains*)
    (unless (or (eq domain 'linear) (and domain (every #'atom domain)))
      (format t "~%~%Illegal entry in *domains**: ~A" domain)))
  (cond ((alist-example-p (first examples))
	 (format t "~%~% Looks like a-list examples")
	 (mapc #'check-alist-example examples))
	(t (format t "~%~%Looks like ordered examples")
	   (mapc #'check-ordered-example examples)))
  nil)

(defun check-alist-example (example)
  (unless (member (first example) *categories*)
    (format t "~%~%Unknown class for: ~A" example))
  (dolist (pair (rest example))
    (unless
      (and (listp pair) (member (first pair) *feature-names*) (null (rest (rest pair)))
	   (or (and (rest pair) (or (and (numberp (second pair))
					 (linear-feature-p (first pair)))
				    (member (second pair) (feature-domain (first pair)))
				    (eq (second pair) *missing-value*)))
	       (binary-feature-p (first pair))))
      (format t "~%~%Illegal feature: ~A in ~%~A" pair example))))

(defun check-ordered-example (example)
  (unless (member (first example) *categories*)
    (format t "~%~%Unknown class for: ~A" example))
  (unless (and (or (consp (second example)) (arrayp (second example)))
	       (= (length (second example)) (length *domains*)))
    (format t "~%~%Illegal example: ~A" example))
  (dotimes (i (length *domains*))
    (let ((value (elt (second example) i))
	  (domain (elt *domains* i)))
      (unless
	(or (eq value *missing-value*)
	    (and (numberp value)
		 (eq domain 'linear))
	    (and (listp domain) (member value domain)))
	(format t "~%~%Illegal feature: ~A = ~A in ~%~A" (feature-name i) value example)))))

;;;;-------------------------------------------------------------------------------------------------------------
;;;; Functions for checking theory
;;;;-------------------------------------------------------------------------------------------------------------

(defun check-theory-antecedents (&optional (theory *theory*))
  (setf *augmented-feature-names*
	(append *feature-names* '(< <= = >= >)))
  (let* (bad-rules-and-antecedents bad-consequents good-consequents
	 (answer
	   (dolist (rule theory (list (reverse bad-rules-and-antecedents) (reverse bad-consequents)))
	     (dolist (antecedent (antecedents rule))
	       (when (eq (first antecedent) 'not) (setf antecedent (second antecedent)))
	       (let ((predicate (first antecedent)))
		 (if (member predicate *augmented-feature-names*)
		     (when (and (= (length antecedent) 2)
				(not (eq (domain predicate) 'linear)))
		       (let ((value (second antecedent)))
			 (unless (or (pcvar-p value)
				     (member value (domain predicate)))
			   (format t "~%The value for predicate ~A 
is not within the domain of its attribute in rule ~%~1T~A" predicate rule)
			   (setf bad-rules-and-antecedents (cons (list rule antecedent)
								 bad-rules-and-antecedents)))))
		     (unless (find antecedent (remove rule theory) :key
				   'consequent
				   :test 'equal)
		       (format t "~%Antecedent ~A 
is not implied by any rule and is not a feature in rule ~%~A."
			       antecedent rule)
		       (setf bad-rules-and-antecedents (cons (list rule antecedent)
							     bad-rules-and-antecedents))))))
	     (let ((consequent (consequent rule)))
	     (or (member consequent good-consequents :test 'equal-or-av)
		 (member consequent bad-consequents :test 'equal-or-av)
		 (if (find-consequent-in-theory consequent theory)
		     (setf good-consequents (cons consequent good-consequents))
		     (setf bad-consequents (cons consequent bad-consequents))))))))
    (if  (and (null (first answer))
		 (null (second answer)))
      (format t "~%Theory is O.K.")
      answer)))

(defun find-consequent-in-theory (consequent theory)
  (if (or (member (first consequent) *categories*)
	  (dolist (rule theory)
	    (when (member consequent (antecedents rule) :test 'equal-or-av)
	      (return t))))
      consequent
      (let ((rules (find-rules consequent theory)))
      (format t "~%~%The rule~P ~{~%~1T~A ~} ~%~[is ~;are ~] not used by the theory." 
	      (length rules)
	      rules
	      (if (> (length rules) 1)
		  1 0)))))



(defun equal-or-av (list1 list2)
  (or (equal list1 list2)
      (and (consp list1)
	   (consp list2)
	   (> (length list1) 1)
	   (> (length list2) 1)
	   (set-equal list1 list2 :test
		      #'alphabetic-variant))))



;;;;-------------------------------------------------------------------------------------------------------------
;;;; Functions for computing information about data
;;;;-------------------------------------------------------------------------------------------------------------

(defun compute-domains (&optional (features *feature-names*) (examples *raw-examples*))
  "Compute feature domains for alist examples"
  (mapcar #'(lambda (feature) (compute-domain feature examples)) features))

(defun compute-domain (feature &optional (examples *raw-examples*))
  (let (domain)
    (dolist (ex examples domain)
      (let ((pair (assoc feature (rest ex))))
	(if pair
	    (pushnew (second pair) domain))))))

(defun describe-theory (&optional (theory *theory*))
  (let* ((groups (group-rules theory))
	 (num-rules (length theory))
	 (num-conses (length groups))
	 (num-antes  (count-antecedents theory)))
    (format t "~%~%Number of rules: ~A" num-rules)
    (format t "~%Number of consequents: ~A" num-conses)
    (format t "~%Number of symbols: ~D" (+ num-antes num-rules))
    (format t "~%Average number of disjuncts: ~,2F" (/ num-rules num-conses))
    (format t "~%Average number of antecedents: ~,2F" (/ num-antes num-rules))))
    
(defun count-antecedents (rules)
  (let ((sum 0))
    (dolist (rule rules sum)
      (incf sum (length (brule-antecedents rule))))))

(defun group-rules (rules)
  (let (alist)
    (dolist (rule rules alist)
      (let ((set (assoc (brule-consequent rule) alist :test #'equal)))
	(if set
	    (nconc set (list rule))
	    (push (list (brule-consequent rule) rule) alist))))))

(defun describe-data (&optional count-missing (examples *raw-examples*))
  (let* ((groups (group-examples examples))
	 (num-examples (length examples))
	 (num-categories (length groups))
	 (unrep-cats (set-difference *categories* (mapcar #'first groups)))
	 (num-feature-values (reduce #'+ (mapcar #'(lambda (d) (if (atom d)
								   0
								   (length d)))
						  *domains*)))
	 (num-features (length *feature-names*))
	 (num-linear-features (count-if #'(lambda (d)
					    (eq d 'linear))
					*domains*))
	 (num-binary-features (count-if #'(lambda (d)
					    (or (equal d '(true false))
						(equal d '(false true))))
					*domains*))
	 (num-nominal-features (- num-features num-linear-features num-binary-features)))
    (format t "~%Number of examples: ~A" num-examples)
    (format t "~%Number of features: ~A" num-features)
    (format t "~%Number of linear features: ~A (~,2F%)" num-linear-features (* 100 (/ num-linear-features num-features)))
    (format t "~%Number of binary features: ~A (~,2F%)" num-binary-features (* 100 (/ num-binary-features num-features)))
    (format t "~%Number of nominal features: ~A (~,2F%)" num-nominal-features (* 100 (/ num-nominal-features num-features)))
    (format t "~%Number of categories: ~A (~,2F% random guess)" num-categories (* 100 (/ 1 num-categories)))
    (format t "~%Unrepresented categories: ~A" unrep-cats)
    (format t "~%Number of examples per category:")
    (dolist (group groups)
      (let ((n (length (rest group))))
	(format t "~%  ~A: ~A (~,2F%)" (first group) n (* 100 (/ n num-examples)))))
    (format t "~%Average number of examples per category: ~,2F" (/ num-examples num-categories))
    (unless (zerop (- num-features num-linear-features))
      (format t "~%Average number of feature values: ~,2F"
	      (/ num-feature-values (- num-features num-linear-features))))
    (if count-missing (count-missing examples))))
  
(defun group-examples (examples)
  (let (alist)
    (dolist (example examples alist)
      (let ((set (assoc (first example) alist)))
	(if set
	    (nconc set (list example))
	    (push (list (first example) example) alist))))))

(defun print-theory (&optional (theory *theory*))
  (dolist (rule theory)
    (format t "~%~(~A~) <-~{ ~(~A~)~}" (brule-consequent rule) (brule-antecedents rule)))
  (format t "~%~%Observable features:~{ ~(~A~)~}" *feature-names*)
  (format t "~%~%Categories: ~{ ~(~A~)~}" *categories*))

(defun count-missing (&optional (examples *raw-examples*))
  "Give info about missing values"
  (setf examples (make-ordered-examples examples))
  (let ((value-count 0) (example-count 0) (num-examples (length examples))
	(num-features (length (second (first examples)))))
    (dolist (example examples)
      (let ((count (count *missing-value* (second example))))
	(unless (zerop count)
	  (incf value-count count)
	  (incf example-count))))
    (format t "~%There are ~A missing values (~,2F%)" value-count (* 100 (/ value-count (* num-examples num-features))))
    (format t "~%There are ~A examples with missing values (~,2F%)" example-count (* 100 (/ example-count num-examples)))))

(defun delete-examples-with-missing (&optional (examples *raw-examples*))
  "Delete examples with missing values for ordered examples"
  (delete-if #'(lambda (ex) (member *missing-value* (second ex))) (make-ordered-examples examples)))
  
;;;;-------------------------------------------------------------------------------------------------------------
;;;; Functions for Testing Systems and Theory
;;;;-------------------------------------------------------------------------------------------------------------

(defun test-system (system training-result test-examples &optional (print-results t))
  (let ((test-function (append-symbols 'test- system))
	(num-examples (length test-examples))
	(num-correct 0)
	answer)
    (dolist (example test-examples)
      (setf answer (funcall test-function example training-result))
      (when (eq answer (first example)) (incf num-correct))
      (trace-print print-results "~%~AReal category: ~A; Classified as: ~A"
		 (if (eq answer (first example)) "  " "**")  (first example) answer))
    (format t "~%~%~A classified ~,2F% of the ~D test cases correctly."
	    test-function (* 100 (/ num-correct num-examples)) num-examples)))

(defvar *number-nils* 0)

(defun test-theory (&optional (theory *theory*) (examples *raw-examples*) (print-results t))
  "Test the accuracy of a theory on a set of examples"
  (setf *number-nils* 0)
  (let* ((num-exs (length examples))
	 (accuracy (if examples
		      (let ((number-right 0))
			(dolist (example examples (* 100.0 (/ number-right num-exs)) )
			  (setf number-right (+ number-right (test-theory-example example theory print-results)))))
		      100)))
  (format t "~%~%Theory classified ~,2F% of the ~D test cases correctly." accuracy num-exs)
  (format t "~%Number not classified in any category: ~D (~,2F%)" *number-nils* (* 100 (/ *number-nils* num-exs)))))

(defun test-theory-example (example &optional (theory *theory*) (print-results t))
  "Return probability that example is classified correctly by theory"
  (setf example (make-alist-example example))
  (let ((provable-categories (provable-example-categories example theory *categories* print-results)))
    (cond ((null provable-categories)
	   (incf *number-nils*)
	   (if (member 'negative *categories*)
	       (if (eq (first example) 'negative)
		   1 0)
	       (/ 1 (length *categories*))))
	  ((member (first example) provable-categories)
	   (/ 1 (length provable-categories)))
	  (t 0))))

(defun provable-example-categories (example &optional (theory *theory*) (categories *categories*) (print-results t))
  "Reutrn the list of categories example is provable in"
  (if (or (equal categories '(+ -)) (equal categories '(- +)))
      (if (prove theory example) '(+) '(-))
      (let (proved-categories)
	(dolist (category categories)
	  (if (prove theory example (list category))
	      (push category proved-categories)))
	(trace-print print-results
		     "~%~AReal category: ~A; Proved categories: ~A"
		     (if (or (and (eq (first example) (first proved-categories))
				  (null (rest proved-categories)))
			     (and (eq (first example) 'negative) (null proved-categories)))
			 "  " "**")
		     (first example) proved-categories)
	proved-categories)))

;;;;-------------------------------------------------------------------------------------------------------------
;;;; Functions for converting between different data formats
;;;;-------------------------------------------------------------------------------------------------------------

(defun alist-example-p (example)
  "Return T if example is in alist form"
  (or (rest (rest example))
      (null (rest example))
      (let ((second-elt (second example)))
	(and (consp second-elt)
	     (null (rest (rest second-elt)))
	     (member (first second-elt) *feature-names*)))))

(defun make-ordered-examples (examples &optional (array-flag t))
  (if (alist-example-p (first examples))
      (mapcar #'(lambda (ex) (convert-to-ordered-example ex array-flag)) examples)
      examples))

(defun make-ordered-example (example)
  (if (alist-example-p example)
      (convert-to-ordered-example example)
      example))

(defun convert-to-ordered-example (example &optional (array-flag t))
  "Convert an example from alist form to ordered feature form (an array
   if flag set)"
  (let ((counter 0)
	output-example
	(array (if array-flag (make-array (length *domains*)))))
    (dolist (feature-name *feature-names* (list (first example) (if array-flag
								    array
								    (reverse output-example))))
      (let* ((item (find feature-name (cdr example) :key #'car))
	     (value (if (and item (second item))
			 (second item)
			 (let ((domain (elt *domains* counter)))
			   (if (and (consp domain)(member 'true domain))
			       (if item 'true 'false)
			       (if item (error "No value for ~A" item) *missing-value*))))))
	(if array-flag
	    (setf (aref array counter) value)
	    (push value output-example))
	(incf counter)))))

(defun make-alist-examples (examples)
  (if (not (alist-example-p (first examples)))
      (mapcar #'convert-to-alist-example examples)
      examples))

(defun make-alist-example (example)
  (if (not (alist-example-p example))
      (convert-to-alist-example example)
      example))

(defun convert-to-alist-example (example)
  (let (instance)
    (dotimes (i (length *feature-names*) (cons (first example) (reverse instance)))
      (let ((value (elt (second example) i)))
	(unless (or (eq value *missing-value*) (eq value 'false))
	  (push (if (eq value 'true)
		    (list (elt *feature-names* i))
		    (list (elt *feature-names* i) value))
		instance))))))

(defun convert-category-examples ()
  (mapcan #'(lambda (cat)
	      (mapcar #'(lambda (instance) (list cat instance))
		      (eval cat)))
	  *categories*))

(defun make-deduce-rules (theory)
  (mapcar #'(lambda (rule)
	      (cons '<- (cons (first rule) (rest (rest rule)))))
	  theory))

;;;;-------------------------------------------------------------------------------------------------------------
;;;; Accessory functions for data
;;;;-------------------------------------------------------------------------------------------------------------

(defun make-domains (levels-list)
  ;;; If features values are simply integers 0 to n then the number of values of a
  ;;; feature is sufficient for determining its domain.  This function creates a
  ;;; list suitable for *domains* given a list of the number of values for each feature.
  ;;; See the file SOYBEAN-RDATA for a sample use.

  (mapcar #'(lambda (levels) (let ((domain nil)) 
			       (dotimes (i levels domain)
				 (setf domain (nconc domain (list i))))))
	  levels-list))

(defun make-examples (pos-instances neg-instances)
  ;;; Converts lists of positive and negative instances into a list of examples
  ;;; suitable for ID3.

  (append (mapcar #'(lambda (instance) (list '+ instance)) pos-instances)
	  (mapcar #'(lambda (instance) (list '- instance)) neg-instances)))


;;;;-------------------------------------------------------------------------------------------------------------
;;;;  Miscellaneous functions
;;;;-------------------------------------------------------------------------------------------------------------


(defun pick-one (list)
  "Pick an item randomly from the list"
  (nth (random (length list)) list))

(defun read-file (file-name)
  (with-open-file (input file-name :direction :input)
    (read  input nil nil)))

(defun write-data-file (filename &optional
			(var-list '(*feature-names* *domains* *categories* *theory* *raw-examples*))
			(pretty-print t))
  (let ((*print-pretty* pretty-print))
    (with-open-file (file filename :direction :output :if-exists :new-version)
    (format file ";;; -*- Mode:Common-Lisp; Package:USER -*-~%")
      (dolist (var var-list)
	(if (eq var '*raw-examples*)
	    (progn (format file "~%(setf *raw-examples* '(")
		   (dolist (ex (eval var))
		     (format file "~%  ~A" ex))
		   (format file "~%))"))
	    (format file "~%(setf ~A~%  ~A)~%" var (list 'quote (eval var))))))))


(defun read-line-list (stream)
  (read-from-string (concatenate 'string "(" (read-line stream nil) ")")))

(defun square (x)
  (* x x))

(defun /-float (a b)
  "Division forcing a floating point output"
  (coerce (/ a b) 'single-float))

(defun append-symbols (&rest symbols)
  (intern (format nil "~{~A~}" symbols)))

(defun seconds-since (time)
   ;;; Return seconds elapsed since given time (initially set by get-internal-run-time)
  (/ (- (get-internal-run-time) time)
     internal-time-units-per-second))
