;;;; Copyright (c) 1992 by the University of California, Irvine. 
;;;; This program may be freely copied, used, or modified provided that this
;;;; copyright notice is included in each copy of this code.  This program
;;;; may not be sold or incorporated into another product to be sold withou
;;;; written permission from the Regents of the University of California.
;;;; This program was written by Cliff Brunk


;;;  UCI MACHINE LEARNING REPOSITORY TO FOCL TRANSLATOR
;;;
;;;  1. A label for the examples 
;;;      eg.    REQUIRES_SURGERY
;;;
;;;  2. A label for the instances
;;;      eg.    HORSE
;;;
;;;  3. A list of attribute lables
;;;      eg.    (SURGERY (1 SURGERY) (2 MEDICAL)) (AGE (1 ADULT) (2 YOUNG)) HOSPITAL_NUMBER RECTAL_TEMPERATURE PULSE RESPIRATORY_RATE ...
;;;          if the lable is a list then the first element is the lable and the rest of the list is a substitution list
;;;          in this case value 1 appearing in the first position will be replaced by surgery.
;;;
;;;  4. A list of attribute types
;;;      eg.    :SURGERY :AGE :HOSPITAL_NUMBER :RECTAL_TEMPERATURE :PULSE :RESPIRATORY_RATE ...
;;;
;;;  5. The label of the attribute containing the example class
;;;      eg.    SURGICAL_LESION
;;;
;;;  6. A list of attribute values indicating the instance is a member of the positive class
;;;      eg.    1
;;;
;;;  7. A list of missing value indicators
;;;      eg.    ?
;;;
;;;  8. The data appears on subsequent lines each attribute delimited by a single specified character and
;;;     each instance appearing on a sperate line.
;;;
;;;      eg.    2 1 530101 38.50 66 28 3 3 ? 2 5 4 4 ? ? ? 3 5 45.00 8.40 ? ? 2 2 11300 00000 00000 2
;;;             ...

#|
;;; Sample header created for horse-colic/horse-colic.data from information in horse-colic/horse-colic.names

REQUIRES_SURGERY
HORSE
(SURGERY (1 SURGERY) (2 OTHER)) (AGE (1 ADULT YOUNG)) HOSPITAL_NUMBER RECTAL_TEMPERATURE PULSE RESPIRATORY_RATE (EXTREMITY_TEMPERATURE (1 NORMAL) (2 WARM) (3 COOL) (4 COLD)) (PERIPHERAL_PULSE (1 NORMAL) (2 INCREASED) (3 REDUCED) (4 ABSENT)) (MUCOUS_MEMBRANES (1 PINK) (2 BRIGHT_PINK) (3 PALE_PINK) (4  PALE_CYANOTIC) (5  BRIGHT_RED) (6  DARK_CYANOTIC)) (CAPILLARY_REFILL_TIME (1 >3) (2  <=3)) (PAIN (1 NONE) (2 DEPRESSED) (3 INTERMITTENT_MILD_PAIN) (4 INTERMITTENT_SEVERE_PAIN) (5 CONTINUOUS_SEVERE_PAIN)) (PERISTALSIS (1 HYPERMOTILE) (2 NORMAL) (3 HYPOMOTILE) (4 ABSENT)) (ABDOMINAL_DISTENSION (1 NONE) (2 SLIGHT) (3 MODERATE) (4 SEVERE)) (NASOGASTRIC_TUBE (1 NONE) (2 SLIGHT) (3 SIGNIFICANT)) (NASOGASTRIC_REFLUX (1 NONE) (2 >1) (3 <1)) NASOGASTRIC_REFLUX_PH (RECTAL_EXAMINATION  (1 NORMAL) (2 INCREASED) (3 DECREASED) (4 ABSENT)) (ABDOMEN (1 NORMAL) (2 OTHER) (3 FIRM_FECES_IN_THE_LARGE_INTESTINE) (4 DISTENDED_SMALL_INTESTINE) (5 DISTENDED_LARGE_INTESTINE)) PACKED_CELL_VOLUME TOTAL_PROTEIN (ABDOMINOCENTESIS_APPEARANCE (1 CLEAR) (2 CLOUDY) (3 SEROSANGUINOUS)) ABDOMCENTESIS_TOTAL_PROTEIN (OUTCOME (1 LIVED) (2 DIED) (3 EUTHANIZED)) (SURGICAL_LESION (1 YES) (2 NO)) TYPE_OF_LESION_1 TYPE_OF_LESION_2 TYPE_OF_LESION_3 (CP_DATA (1 YES) (2 NO))
:SURGERY :AGE :HOSPITAL_NUMBER :RECTAL_TEMPERATURE :PULSE :RESPIRATORY_RATE :EXTREMITY_TEMPERATURE :PERIPHERAL_PULSE :MUCOUS_MEMBRANES :CAPILLARY_REFILL_TIME :PAIN :PERISTALSIS :ABDOMINAL_DISTENSION :NASOGASTRIC_TUBE :NASOGASTRIC_REFLUX :NASOGASTRIC_REFLUX_PH :RECTAL_EXAMINATION :ABDOMEN :PACKED_CELL_VOLUME :TOTAL_PROTEIN :ABDOMINOCENTESIS_APPEARANCE :ABDOMCENTESIS_TOTAL_PROTEIN :OUTCOME :SURGICAL_LESION :TYPE_OF_LESION_1 :TYPE_OF_LESION_2 :TYPE_OF_LESION_3 :CP_DATA
SURGICAL_LESION
1
?
2 1 530101 38.50 66 28 3 3 ? 2 5 4 4 ? ? ? 3 5 45.00 8.40 ? ? 2 2 11300 00000 00000 2
1 1 534817 39.2 88 20 ? ? 4 1 3 4 2 ? ? ? 4 2 50 85 2 2 3 2 02208 00000 00000 2 
2 1 530334 38.30 40 24 1 1 3 1 3 3 1 ? ? ? 1 1 33.00 6.70 ? ? 1 2 00000 00000 00000 1 
|#

#|
;;; Sample header created for mushroom/agaricus-lepiota.data from information in mushroom/agaricus-lepiota.names

EDIBLE-MUSHROOM
MUSHROOM
CLASS,CAP-SHAPE,CAP-SURFACE,CAP-COLOR,BRUISES,ODOR,GILL-ATTACHMENT,GILL-SPACING,GILL-SIZE,GILL-COLOR,STALK-SHAPE,STALK-ROOT,STALK-SURFACE-ABOVE-RING,STALK-SURFACE-BELOW-RING,STALK-COLOR-ABOVE-RING,STALK-COLOR-BELOW-RING,VEIL-TYPE,VEIL-COLOR,RING-NUMBER,RING-TYPE,SPORE-PRINT-COLOR,POPULATION,HABITAT
CLASS,CAP-SHAPE,CAP-SURFACE,CAP-COLOR,BRUISES,ODOR,GILL-ATTACHMENT,GILL-SPACING,GILL-SIZE,GILL-COLOR,STALK-SHAPE,STALK-ROOT,STALK-SURFACE-ABOVE-RING,STALK-SURFACE-BELOW-RING,STALK-COLOR-ABOVE-RING,STALK-COLOR-BELOW-RING,VEIL-TYPE,VEIL-COLOR,RING-NUMBER,RING-TYPE,SPORE-PRINT-COLOR,POPULATION,HABITAT
CLASS
e
?
p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
|#



;;;
;;;  To run on under MCL2.0 type:
;;;     (convert-repository-to-focl #\space)
;;;     (convert-repository-to-focl #\space :focl-multiple-relation-low-arity-example-format)
;;;
;;;  delimiter                character used to delimit values in examples
;;;  convertion-preference    :focl-relational-format
;;;                           :focl-multiple-relation-low-arity-example-format
;;;                           :focl-single-relation-high-arity-example-format
;;;  surpress-missing-values
;;;  instance-start-index
;;;  input-path
;;;  output-path

(defun convert-repository-to-focl (&optional (delimiter #\,)
                                             (convertion-preference :focl-relational-format)
                                             (surpress-missing-values t)
                                             (instance-start-index 1)
                                             (input-path (choose-file-dialog :button-string "Convert"))
                                             (output-path (choose-new-file-dialog :prompt "File to contain FOCL data:")))
  (let* ((in-stream (open input-path :direction :input))
         (out-stream (if output-path (open output-path :direction :output :if-exists :overwrite) t))
         (example-name (first (read-data-line in-stream delimiter)))
         (instance-name (first (read-data-line in-stream delimiter)))
         (attribute-name-value-conversion-line (read-data-line in-stream delimiter))
         (attribute-names (real-attribute-names attribute-name-value-conversion-line))
         (value-conversion-info (value-conversion attribute-name-value-conversion-line))
         (attribute-types (read-data-line in-stream delimiter))
         (class-name (first (read-data-line in-stream delimiter)))
         (positive-indicators (read-data-line in-stream delimiter))
         (missing-value-indicators (read-data-line in-stream delimiter))
         (class-position (position class-name attribute-names)))
    
    (format t "~%~%CONVERTING...~%")
    
    (output-header out-stream input-path  delimiter convertion-preference surpress-missing-values instance-start-index attribute-names value-conversion-info
                   attribute-types class-name positive-indicators missing-value-indicators class-position)

    (case convertion-preference
      (:focl-relational-format
       (convert-repository-to-focl-relational-format in-stream
                                                     out-stream
                                                     example-name
                                                     attribute-names
                                                     value-conversion-info
                                                     attribute-types
                                                     class-name
                                                     positive-indicators
                                                     missing-value-indicators
                                                     class-position
                                                     delimiter
                                                     surpress-missing-values
                                                     instance-name
                                                     instance-start-index))
      (:focl-multiple-relation-low-arity-example-format
       (convert-repository-to-focl-multiple-relation-low-arity-example-format in-stream
                                                                              out-stream
                                                                              example-name
                                                                              attribute-names
                                                                              value-conversion-info
                                                                              attribute-types
                                                                              class-name
                                                                              positive-indicators
                                                                              missing-value-indicators
                                                                              class-position
                                                                              delimiter
                                                                              surpress-missing-values
                                                                              instance-name
                                                                              instance-start-index))
      (:focl-single-relation-high-arity-example-format
       (convert-repository-to-focl-single-relation-high-arity-example-format in-stream
                                                                             out-stream
                                                                             example-name
                                                                             attribute-names
                                                                             value-conversion-info
                                                                             attribute-types
                                                                             class-name
                                                                             positive-indicators
                                                                             missing-value-indicators
                                                                             class-position
                                                                             delimiter
                                                                             surpress-missing-values
                                                                             instance-name
                                                                             instance-start-index))
      )
    (close in-stream)
    (close out-stream))
  (format t "~%DONE~%")
  (values))

(defun convert-repository-to-focl-single-relation-high-arity-example-format (in-stream
                                                                             out-stream
                                                                             example-name
                                                                             attribute-names
                                                                             value-conversion-info
                                                                             attribute-types
                                                                             class-name
                                                                             positive-indicators
                                                                             missing-value-indicators
                                                                             class-position
                                                                             delimiter
                                                                             surpress-missing-values
                                                                             instance-name
                                                                             instance-start-index)
  instance-name instance-start-index class-name
  (when (and missing-value-indicators
             surpress-missing-values)
    (format t "~%Missing values can not be suppressed when converting to FOCL's single relation high arity example format."))
  (let* ((vars (mapcar #'(lambda (id) (intern (format nil "?~a" id))) (delete-position class-position attribute-names)))
         (types (delete-position class-position attribute-types))
         (conversion (delete-position class-position value-conversion-info)))
    (format out-stream "~%~%(def-pred ~S~%  :vars ~S~%  :type ~S~%  :induction nil)~%" example-name vars types)
    (do ((instance (read-data-line in-stream delimiter) (read-data-line in-stream delimiter)))
        ((null instance))
      (if (member (nth class-position instance) positive-indicators)
        (format out-stream "~%(def-example ~S~%  ~S)" example-name (mapcar #'convert-a-value (delete-position class-position instance) conversion))
        (format out-stream "~%(def-example (not ~S)~%  ~S)" example-name (mapcar #'convert-a-value (delete-position class-position instance) conversion))))))


(defun convert-repository-to-focl-multiple-relation-low-arity-example-format (in-stream
                                                                              out-stream
                                                                              example-name
                                                                              attribute-names
                                                                              value-conversion-info
                                                                              attribute-types
                                                                              class-name
                                                                              positive-indicators
                                                                              missing-value-indicators
                                                                              class-position
                                                                              delimiter
                                                                              surpress-missing-values
                                                                              instance-name
                                                                              instance-start-index)
  class-name
  (let* ((relations (delete-position class-position attribute-names))
         (types (delete-position class-position attribute-types))
         (conversion (delete-position class-position value-conversion-info)))
    (output-def-pred-arity-1 out-stream example-name nil nil)
    (do* ((rs relations (rest rs))
          (relation (first rs) (first rs))
          (ts types (rest ts))
          (type (first ts) (first ts)))
         ((null rs))
      (output-def-pred-arity-2 out-stream relation type nil nil))
    (do ((instance (read-data-line in-stream delimiter) (read-data-line in-stream delimiter))
         (instance-index instance-start-index (incf instance-index)))
        ((null instance))
      (if (member (nth class-position instance) positive-indicators)
        (format out-stream "~%(def-example ~S ~S_~S" example-name instance-name instance-index)
        (format out-stream "~%(def-example (not ~S) ~S_~S" example-name instance-name instance-index))
      (do* ((rs relations (rest rs))
            (relation (first rs) (first rs))
            (cs conversion (rest cs))
            (c (first cs) (first cs))
            (vs (delete-position class-position instance) (rest vs))
            (value (convert-a-value (first vs) c) (convert-a-value (first vs) c)))
           ((null rs))
        (unless (and surpress-missing-values
                     (member value missing-value-indicators))
          (format out-stream "~%   (~S ~S_~S ~S)" relation instance-name instance-index value)))
      (format out-stream ")~%"))))


(defun convert-repository-to-focl-relational-format (in-stream
                                                     out-stream
                                                     example-name
                                                     attribute-names
                                                     value-conversion-info
                                                     attribute-types
                                                     class-name
                                                     real-positive-indicators
                                                     missing-value-indicators
                                                     class-position
                                                     delimiter
                                                     surpress-missing-values
                                                     instance-name
                                                     instance-start-index)
  (let* ((relation-array (make-array (length attribute-names) :initial-element nil))
         (class-value-conversion (nth class-position value-conversion-info))
         (positive-indicators (mapcar #'(lambda (x) (convert-a-value x class-value-conversion)) real-positive-indicators))
         relation-index)
    (do ((instance (read-data-line in-stream delimiter) (read-data-line in-stream delimiter))
         (instance-count instance-start-index (incf instance-count)))
        ((null instance))
      (setf relation-index -1)
      (do* ((cs value-conversion-info (rest cs))
            (c (first cs) (first cs))
            (vs instance (rest vs))
            (value (convert-a-value (first vs) c) (convert-a-value (first vs) c)))
           ((null vs))
        (incf relation-index)
        (unless (and surpress-missing-values
                     (member value missing-value-indicators))
          (setf (aref relation-array relation-index) (push (list (format nil "~S_~S" instance-name instance-count) value) (aref relation-array relation-index))))))
    
    (do* ((names attribute-names (rest names))
          (name (first names) (first names))
          (types attribute-types (rest types))
          (type (first types) (first types))
          (relation-index 0 (incf relation-index)))
         ((null names))
      (if (eql name class-name)
        (let ((pos nil)
              (neg nil))
          (dolist (tuple (aref relation-array relation-index))
            (if (member (second tuple) positive-indicators)
              (setf pos (push (list (first tuple)) pos))
              (setf neg (push (list (first tuple)) neg))))
          (output-def-pred-arity-1 out-stream example-name pos neg))
        (output-def-pred-arity-2 out-stream name type (nreverse (aref relation-array relation-index)) nil)))))

(defun real-attribute-names (list)
  (mapcar #'(lambda (item) (if (consp item) (first item) item)) list))

(defun value-conversion (list)
  (mapcar #'(lambda (item) (if (consp item) (rest item) nil)) list))

(defun convert-a-value (original value-alist)
  (if value-alist
    (let ((bucket (assoc original value-alist)))
      (if bucket
        (second bucket)
        original))
    original))
      
(defun read-data-line (stream delimiter)
  (let ((line (read-line stream nil nil))
        (items nil)
        (item nil)
        (start 0))
    (when line
      (unless (eql delimiter #\space) (setf line (nsubstitute #\_ #\space line)))
      (setf line (nsubstitute #\space delimiter line))
      (do ()
          ((eql item :eof) (nreverse items))
        (multiple-value-setq (item start) (read-from-string line nil :eof :start start))
        (unless (eq item :eof) (push item items))))))

(defun delete-position (position list)
  (cond ((= position 0) (rest list))
        ((> position 0)
         (let ((temp (nthcdr (- position 1) list)))
           (when temp
             (rplacd temp (rest (rest temp))))
           list))
        (t list)))

(defun output-def-pred-arity-1 (stream name pos neg)
  (format stream "~%~%(def-pred ~A~%" name)
  (format stream "  :vars  (?Example)~%")
  (format stream "  :type  (:example)~%")
  (format stream "  :mode  (:+)~%")
  (when pos (format stream "  :pos   ~S~%" pos))
  (when neg (format stream "  :neg   ~S~%" neg))
  (format stream "  :induction   nil~%")
  (format stream "  :constraint  nil~%")
  (format stream "  :commutative nil~%")
  (format stream "  :questions ( (:FACT (?Example is a positive example of ~A))~%" name)
  (format stream "               (:QUESTION (is ?Example a positive example of ~A)) ) )~%" name))


(defun output-def-pred-arity-2 (stream name type pos neg)
  (format stream "~%~%(def-pred ~A~%" name)
  (format stream "  :vars  (?Example ?Value)~%")
  (format stream "  :type  (:example ~(~S~))~%" type)
  (format stream "  :mode  (:+ :?)~%")
  (when pos (format stream "  :pos   ~S~%" pos))
  (when neg (format stream "  :neg   ~S~%" neg))
  (format stream "  :induction   t~%")
  (format stream "  :constraint  nil~%")
  (format stream "  :commutative nil~%")
  (format stream "  :questions ( (:FACT (the value of ~A for ?Example is ?Value))~%" name)
  (format stream "               (:QUESTION (is the value of ~A for ?Example ?Value))~%" name)
  (format stream "               (?Value ((?Example) (what is the value of ~A for ?Example) :SINGLE-VALUED)) ) )~%" name))


(defun output-header (out-stream input-path delimiter convertion-preference surpress-missing-values instance-start-index
                      attribute-names value-conversion-info attribute-types class-name positive-indicators missing-value-indicators class-position)
  (format out-stream "~%;;; Output from repository-to-focl-translator")
  (format out-stream "~%;;;~%;;; Input File :   ~A" (pathname-name input-path))
  (let ((type (pathname-type input-path)))
      (if (or (equalp type :UNSPECIFIC) (null type))
        (format out-stream "~%" type)
        (format out-stream ".~A~%" type)))
  (format out-stream "~%;;; Delimiter  :   \"~A\"" delimiter)
  (format out-stream "~%;;; Convertion Preference :    ~S" convertion-preference)
  (format out-stream "~%;;; Surpress Missing Values :  ~A" surpress-missing-values)
  (format out-stream "~%;;; Instance Start Index :     ~A" instance-start-index)
  (format out-stream "~%;;;~%;;; Class Name :   ~A  [position ~A]" class-name class-position)
  (format out-stream "~%;;;    Positive Indicators :      ~S" positive-indicators)
  (format out-stream "~%;;;    Missing Value Indicators : ~{~S~}" missing-value-indicators)
  (format out-stream "~%;;;~%;;;  Attribute Information:~%;;;")
  (format out-stream "~%;;;     NAME~40TTYPE~72TVALUE CONVERSION INFORMATION")
  (do ((as attribute-names (rest as))
       (ts attribute-types (rest ts))
       (cs value-conversion-info (rest cs)))
      ((null as))
    (format out-stream "~%;;;     ~(~S~)~40T~S~72T~{~S ~}" (first as) (first ts) (first cs))))


#|
;;;_______________________________________
;;; CONVERT-REAL-PROLOG-TO-FOCL-PROLOG

(defun read-prolog-conjunction (real-prolog-string)
     (convert-prolog-list-into-focl-list (read-string-into-list (substitute #\space #\, (remove #\. real-prolog-string)))))

(defun read-string-into-list (string)
  (let ((start 0)
        (item nil)
        (list nil))
    (do ()
        ((eq item :eof) (nreverse list))
      (multiple-value-setq (item start) (read-from-string string nil :eof :start start))
      (unless (eq item :eof)
        (push item list)))))

(defun convert-prolog-list-into-focl-list (prolog-list)
  (when prolog-list
    (let ((relation (first prolog-list))
          (parameters (second prolog-list))
          (remaining-literals (rest (rest prolog-list))))
      (cond ((eq relation '!)  (cons '! (convert-prolog-list-into-focl-list (rest prolog-list))))
            ((or (eq relation 'and)
                 (eq relation 'or)
                 (eq relation 'not)) (cons
                                      (cons relation
                                            (convert-prolog-list-into-focl-list parameters))
                                      (convert-prolog-list-into-focl-list remaining-literals)))
            ((eq relation 'is) (cons
                                (cons relation
                                      (cons (first parameters)
                                            (convert-prolog-expression-list-into-lisp-expression-list (rest parameters))))
                                (convert-prolog-list-into-focl-list remaining-literals)))
            (t (cons
                (cons relation parameters)
                (convert-prolog-list-into-focl-list remaining-literals)))))))

(defun convert-prolog-expression-list-into-lisp-expression-list (expression)
  (if (consp expression)
    (let ((lisp-args nil))
      (do* ((args expression rest)
            (arg (first args) (first args))
            (next (second args) (second args))
            (rest (rest args) (rest args)))
           ((null args) (nreverse lisp-args))
        (cond ((consp next)
               (push (cons arg (convert-prolog-expression-list-into-lisp-expression-list next)) lisp-args)
               (setf rest (rest rest)))
              (t (push arg lisp-args)))))
    expression))
|#