;;; This is an implementation of the basic ID3 algorithm for learning from examples, January, 1988.

;;;; Copyright (c) 1988 by Raymond Joseph Mooney. This program may be freely copied, used, or
;;;; modified provided that this copyright notice is included in each copy of this code
;;;; and parts thereof.

;;;; This implementation of ID3 produces decision trees descriminating postive and negatives
;;;; instances which are represented by simple nominal feature vectors represented by ordered lists. 
;;;; WEATHER-DATA is a sample datafile for the weather example used in Quinlan's ML journal article

(defparameter *trace-id3* nil "Produces a trace if set to T")

;;; A decision tree is either a symbol representing a leaf (+ or -) or a structure
;;; where decision-tree-feature is the number (starting from 1) of the feature
;;; being tested and decision-tree-subtrees is an assoc list of the form
;;; ((value1 subtree1)(value2 subtree2)...) representing the branches and subtrees

(defstruct (decision-tree (:print-function print-decision-tree))
  feature subtrees)

(defun train-id3 (examples)
;;; This function takes a list of examples where an example is a list consisting of either + or -
;;; and an instance and produces a decision tree which classifies instances into + or -.
  (if (null examples)
      (pick-one *categories*)
      (prog1 (build-decision-tree examples (let ((features nil)) 
					     (dotimes (i (length *domains*) features)
					       (setf features (nconc features (list (1+ i)))))))
	     (trace-print *trace-id3* "~%"))))


(defun build-decision-tree (examples features &optional most-common)
;;; This function produces a decision tree for the given set of examples
;;; by choosing one of the given features (features are indicated by a
;;; number giving its position in the vector) as the root of the tree and
;;; recursively making trees for each of the resulting categories.
;;; most-common passes the most common class of a parent to its children
  
  (let ((p 0)(n 0))
    (dolist (example examples) (cond ((eq (first example) '+)
				      (incf p))
				     ((eq (first example) '-)
				      (incf n))))
    (cond ((null examples) 
	   ;; if there are no examples, label leaf with most common class from the parent node
	   (trace-print *trace-id3* "~%No examples, use most common class of parent: ~A" most-common)
	   most-common)
	  ((zerop p)
	   ;; if there are no positive examples then they must all be
	   ;; negative so make a leaf indicating a negative outcome.
	   (trace-print *trace-id3* "~%All examples -")
	   '-)
	  ((zerop n)
	   ;; if there are no negative examples then they must all be
	   ;; positive so make
	   ;; a leaf indicating a positive outcome.
	   (trace-print *trace-id3* "~%All examples +")
	   '+)
	  ((null features)
	   ;; if there are no features left to descriminate on and all
	   ;; examples are not in the same class, then example set must
	   ;; have had same instance both positive and negative
	   (trace-print *trace-id3* "~%Inconsistent data, using most common class: ~A" (if (> p n) '+ '-))
	   (if (> p n) '+ '-))
	  (t (let ((I (info p n)) (split-feature nil) (E 0) (min-E 1e10))
	       ;; Otherwise find the feature which maximizes information
	       ;; gain (minimizes E) and make it the root of the decision
	       ;; tree (i.e. make it the "split feature")
	       (dolist (feature features)
		 (setf E (expected-info feature examples))
		 (trace-print
		   *trace-id3*
		   "~%Info gain for feature ~A = ~5,3F" feature (- I E))
		 (if (< E min-E)
		     (progn (setf min-E E)(setf split-feature feature))))
	       (trace-print *trace-id3*
			    "~%~%Splitting on feature ~A" split-feature)
	       ;; separate instances based on their value for this feature
	       ;; and process each subset of examples recursively
	       ;; eliminating the splitting feature from the set of features
	       ;; available for use in discriminating between examples
	       (make-decision-tree :feature split-feature
		    :subtrees
		     (mapcar #'(lambda (value)
				 (trace-print *trace-id3*
					      "~%~%Considering value ~A of feature ~A"
					      value split-feature)
				 (list value
				       (build-decision-tree
					(remove-if-not
					  #'(lambda (ex)
					      (eq (nth (1- split-feature)
						       (second ex))
						  value))
					  examples)
					(remove split-feature features)
					(if (> p n)
					    '+
					    '-))))
			     (nth (1- split-feature) *domains*))))))))

(defun expected-info (feature examples)
;;; Compute the expected amount of information needed for the subtrees created by
;;; splitting on the given feature. This is simply a weighted sum of the information
;;; needed for each subtree.

  (let ((E 0) (num-examples (length examples)))
    (dolist (value (nth (1- feature) *domains*))
      (let ((p-i 0) (n-i 0))
	(dolist (example examples)
	  (if (equal (nth (1- feature) (second example)) value)
	      (cond ((eq (first example) '+) (incf p-i))
		    ((eq (first example) '-) (incf n-i)))))
	(incf E (* (/ (+ p-i n-i) num-examples)
		   (info p-i n-i)))))
    E))


(defun info (p n)
;;; Compute the amount of information needed to distinguish the two classes
;;; given p + instances and n - instances

  (let ((s (+ p n)))
    (- (- (if (zerop p)
	      0
	      (* (/ p s) (log (/ p s) 2))))
       (if (zerop n)
	   0
	   (* (/ n s) (log (/ n s) 2))))))


;;;; ==========================================================================================
;;;; Testing and Printing functions
;;;; ==========================================================================================

(defun test-id3 (example decision-tree)
;;; Determines the class of instance by using it to traverse the given decision
;;; tree till a leaf is reached.
  
  (if (symbolp decision-tree)
      decision-tree
      (let* ((value (nth (1- (decision-tree-feature decision-tree)) (second example)))
	     (subtree (second (assoc value (decision-tree-subtrees decision-tree)))))
	(test-id3 example subtree))))


(defun print-decision-tree (tree stream depth &optional (indent 0))
  ;;; Print decision tree in a nice indented form
  (if (= indent 0) (setf tree (format-decision-tree tree)))
  (cond ((atom tree)
	 (format stream "~%~vTClass is: ~A" indent tree))
	(t (format stream "~%~vTFeature: ~A" indent (first tree))
	   (dolist (value-form (rest tree))
	     (format stream "~%~vT  ~A" indent (first value-form))
	     (print-decision-tree (second value-form) stream depth (+ indent 5))))))


(defun format-decision-tree (decision-tree)
  ;;; Format tree with feature names
  (if (symbolp decision-tree)
      decision-tree
      (cons (nth (1- (decision-tree-feature decision-tree)) *feature-names*)
	    (mapcar #'(lambda (choice)
			(list (first choice) (format-decision-tree (second choice))))
		    (decision-tree-subtrees decision-tree)))))


;;;; ==========================================================================================
;;;; The following functions are for multiple category data
;;;; ==========================================================================================


(defun train-multi-id3 (examples)
  "ID3 for multiple categories. Returns a list of (category example-count tree)'s
   for each category where example-count is the number of examples in the
   category and tree is the ID3 tree constructed for the given category examples as +
   and all others as -"

  (dolist (category *categories*)
    (setf (get category 'training-examples) nil))
  (dolist (example examples)
    (push (rest example) (get (first example) 'training-examples)))
  (mapcar #'(lambda (category)
	      (format t "~%~%Category: ~A" category)
	      (let* ((training-examples
		       (append (label-examples category '+)
			       (mapcan #'(lambda (other-category)
					   (label-examples other-category '-))
				       (remove category *categories*))))
		     (*categories* '(+ -))
		     (decision-tree (train-id3 training-examples)))
		(format t "~%~A" decision-tree)
		(list category (length (get category 'training-examples))
		      decision-tree)))
	  *categories*))


(defun label-examples (category label)
  "Relabel the training-examples of a given category with the given class label"
  (mapcar #'(lambda (inst) (cons label inst)) (get category 'training-examples)))


(defun test-multi-id3 (example tree-alist)
  "Uses tree-alist result of TRAIN-MULTI-ID3 to classify a new example.
   Finds all categories whose tree classifies as + and then picks one
   with the most examples inorder to  assign to a single class.  If no
   category matches, then picks category with most examples"

  (let ((matching-class-counts (mapcan #'(lambda (alist-elt)
				  (if (eq (test-id3 example (third alist-elt)) '+)
				      (list (list (first alist-elt) (second alist-elt)))))
			      tree-alist)))
    (if matching-class-counts                   ;If there are matching categories
	(maximum-label matching-class-counts)   ;Then pick one with most examples
	(maximum-label tree-alist))))           ;Else pick most common category


(defun maximum-label (count-alist)
  "Returns the label in count-alist ((label count) ...)
   with the maximum count."

  (let (max-label (max-count 0))
    (dolist (label-count count-alist)
      (when (> (second label-count) max-count)
	(setf max-count (second label-count))
	(setf max-label (first label-count))))
    max-label))

