;;;; A simple version of a k nearest neighbor algorithm.

;;;; Copyright (c) 1991 by Raymond Joseph Mooney. This program may be freely 
;;;; copied, used, or modified provided that this copyright notice is included 
;;;; in each copy of this code and parts thereof.

(sys:compile-load-if "wimberly:mooney.data;data-utilities")

(setf (get 'knn 'parameters) '(*neighbors*))
(setf (get 'knn 'expect-training-error) t)

(defparameter *neighbors* 1)
(defparameter *trace-knn* nil)

(defvar *current-knn-domains* nil)

(defun train-knn (examples)
  (compute-linear-feature-spans)
  (make-ordered-examples examples))

(defun test-knn (example training-examples)
  (setf example (make-ordered-example example))
  (if (null training-examples)
      (pick-one *categories*)
      (let (dist k-closest temp)
	(dotimes (i *neighbors*) (push (cons nil most-positive-fixnum) k-closest))
	(dolist (training-example training-examples)
	  (setf dist (example-distance training-example example))
	  (when (< dist (rest (first k-closest)))
	    (setf temp (first k-closest))
	    (setf (first temp) (first training-example))
	    (setf (rest temp) dist)
	    (do ((k-rest k-closest (rest k-rest)))
		((or (null (rest k-rest))
		     (>= dist (rest (second k-rest))))
		 (setf (first k-rest) temp))
	      (setf (first k-rest) (second k-rest)))))
	(majority-class k-closest))))

(defun majority-class (k-closest)
  (let (counts count (max-count 0) max-category)
    (dolist (pair (nreverse k-closest))
      (setf count (assoc (first pair) counts))
      (if count
	  (incf (rest count))
	  (push (cons (first pair) 1) counts)))
    (dolist (pair counts max-category)
      (when (>= (rest pair) max-count)
	(setf max-count (rest pair))
	(setf max-category (first pair))))))

(defun example-distance (ex1 ex2)
  (let ((dist 0))
    (map nil #'(lambda (feature value1 value2)
		 (incf dist (feature-distance feature value1 value2)))
	 *feature-names* (second ex1) (second ex2))
    dist))

(defun feature-distance (feature value1 value2)
  (let ((span (get feature 'span)))
    (cond ((or (eq value1 *missing-value*) (eq value2 *missing-value*))
	   0.5)
	  (span (/-float (abs (- value1 value2)) span))
	  (t (if (eq value1 value2) 0 1)))))

(defun compute-linear-feature-spans ()
  (unless (eq *current-knn-domains* *domains*)
    (dolist (feature *feature-names*)
      (if (linear-feature-p feature)
	  (progn (unless (get feature 'range) (make-ranges (compute-ranges (list feature))))
		 (setf (get feature 'span) (- (second (get feature 'range))
					      (first (get feature 'range)))))
	  (remprop feature 'span)))
    (setf *current-knn-domains* *domains*)))


;;;; ==========================================================================================
;;;; Variant systems
;;;; ==========================================================================================

(make-variant nn  knn ((*neighbors* 1)))
(make-variant 3nn knn ((*neighbors* 3)))
(make-variant 5nn knn ((*neighbors* 5)))
