;;; -*- Mode:Common-Lisp; Package:USER; Base:10 -*-
;;; This file contains functions for encoding multi-value feature vectors into
;;; binary 

(defparameter *print-encoding* nil          "Prints the binary encoding of values if non-NIL")
(defparameter *one-bit-for-two-values*  t   "Encode two-valued features using one bit")

(defvar       *current-encoded-domains* nil "*domains* whose encoding is currently saved in *encoding-alists*")
(defvar       *encoding-alists* nil         "Binary encodings for feature values")
(defvar       *binary-feature-names* nil)

(defun convert-to-binary-example (example)
  ;;; Encode examples with n-valued features as bit strings using n bits/feature
  ;;; unless short-encode-flag is set in which case log(n) bits are used
  ;;; Return the encoded examples.

  ;; Replace values with their coresponding bit strings in each example
  (list (first example)
	(encode-instance (second example))))

(defun make-encoding ()
  (unless (eq *current-encoded-domains* *domains*)
    (setf *encoding-alists* (mapcar #'encode-domain *domains* *feature-names*))
    (setf *current-encoded-domains* *domains*)
    (setf *binary-feature-names* (make-binary-feature-names))
    (trace-print *print-encoding* "~%~%Encoding: ~A" *encoding-alists*)))

(defun encode-domain (domain feature)
  ;; Assign an n bit string for each value in domain with the one bit for that
  ;; value set to 1
  (cond ((eq domain 'linear)
	 (let ((range (get feature 'range)))
	   ;; Store min and span of linear features in encoding alist for scaling during encoding
	   (when (null range) (make-ranges (compute-ranges)) (setf range (get feature 'range)))
	   (list (first range) (- (second range) (first range)))))
	((and *one-bit-for-two-values* (= (length domain) 2))
	 ;; encode two-valued features using only 1 bit instead of two
	 (if (member (first domain) '(true yes))
	     (list (list (first domain) 1) (list (second domain) 0) (list *missing-value* 0.5))
	     (list (list (first domain) 0) (list (second domain) 1) (list *missing-value* 0.5))))
	(t (nconc
	     (mapcar #'(lambda (value)
		       (cons value (mapcar #'(lambda (v) (if (eq v value) 1 0))
					   domain)))
		   domain)
	     ;; If missing value give 1/N to every feature value
	     (list (cons *missing-value* (make-list (length domain) :initial-element (/-float 1 (length domain)))))))))


(defun encode-instance (instance)
  ;;; Given the encoding for each feature value, encode an instance into binary
  
  (mapcan #'(lambda (feature-value encoding-alist)
	      (if (listp (first encoding-alist))
		  (copy-list (rest (assoc feature-value
					  encoding-alist)))
		  (if (eq feature-value *missing-value*)
		      (list 0.5)                        ; Missing linear features get 0.5
		      ;;; Otherwise scale value between 0 and 1 using min and span stored in encoding alist
		      (list (/-float (- feature-value (first encoding-alist)) (second encoding-alist))))))
	  instance *encoding-alists*))


(defun make-binary-feature-names ()
  (mapcan #'(lambda (feature)
	      (let ((domain (feature-domain feature)))
		(cond ((eq domain 'linear) (list feature))
		      ((and *one-bit-for-two-values* (= (length domain) 2))
		       (if (member (first domain) '(true yes))
			   (list (append-symbols feature '- (first domain)))
			   (list (append-symbols feature '- (second domain)))))
		      (t (mapcar #'(lambda (value) (append-symbols feature '- value))
				 domain)))))
	  *feature-names*))

(defun binary-feature-name (num)
  (if *binary-feature-names*
      (elt *binary-feature-names* num)
      num))