;;;-----------------------------------------------------------------------;
;;;  -*- Syntax: Common-Lisp; Package: USER; Base: 10; Mode: LISP -*-
;;;-----------------------------------------------------------------------;
;;;
;;; Last Modified                               6-Dec-95, 17:58
;;;
;;;         File: phrase-grammar-callouts.lisp
;;; File created: 
;;;      Package: USER
;;;       Author: Alex Franz [amf@cs.cmu.edu]
;;;   
;;; File Description: Lisp callouts for phrase recognizer
;;;
;;;     See also: other phrase code
;;; 
;;;-----------------------------------------------------------------------;
;;;
;;; Change Log:
;;; -----------
;;;
;;;  8-Jun-93 by amf: made some fixes
;;;
;;;-----------------------------------------------------------------------;



;;;-----------------------------------------------------------------------;
;;; Documentation:                                                        ;
;;;-----------------------------------------------------------------------;


;;;-----------------------------------------------------------------------;
;;; Package Statements:                                                   ;
;;;-----------------------------------------------------------------------;

(in-package 'user)
;;; (use-package 'meister)

;;;-----------------------------------------------------------------------;
;;; Structures etc:                                                       ;
;;;-----------------------------------------------------------------------;


;;;-----------------------------------------------------------------------;
;;; Global Variables:                                                     ;
;;;-----------------------------------------------------------------------;

;; set up in dmk-synlex-loader.lisp
(defvar *eng-phrec*) 

;;; defined in dmk-defs.lisp
(defvar *empty-dmk-entry*)


;; turn this on when all -ing, -ed forms are in DMK
(defvar *strict-phrase-checking*)
;;; 12-Jan-95 by amf: turned this on.
(setq *strict-phrase-checking* T)

;;;-----------------------------------------------------------------------;
;;; Macros:                                                               ;
;;;-----------------------------------------------------------------------;


;;;-----------------------------------------------------------------------;
;;; Functions:                                                            ;
;;;-----------------------------------------------------------------------;

(defun or-fs-p (fs)
  "does fs have an *OR* at top level?"
  (and (listp fs)
       fs
       (eq (first fs) '*OR*)))
      

(defun call-phrec-step (phrase-fs word-fs &key (use-ortho nil))
  "multiply out *or*, then call real function"
  (let ((phrase-or (or-fs-p phrase-fs))
	(word-or (or-fs-p word-fs))
	result result-fs)

    ;; optimize for no-*or* case
    (unless (or phrase-or
		word-or)
	(return-from call-phrec-step
	  (call-phrec-step-no-or phrase-fs word-fs use-ortho)))

    (dolist (phrase (if phrase-or
			(cdr phrase-fs) ; then
			(list phrase-fs))) ; else
      (dolist (word (if word-or
			(cdr word-fs) ; then
			(list word-fs))) ; else
	
	(if (setq result-fs (call-phrec-step-no-or phrase word use-ortho))
	    (setq result (cons result-fs result)))))
    
    (if (= (length result) 1)
	(values (car result)) ; then
	(values (cons '*OR* result)))))

;; 28-Nov-95 by EHN -- Modified CALL-PHREC-STEP-NO-OR to preserve token
;; numbers for non-head tokens (part of tokenizer/highlighting fix for
;; Analyzer 1.8.1)

(defun call-phrec-step-no-or (phrase-fs word-fs use-ortho)
  "called by grammar rule that tries to add a word to a phrase"
  (let ((phrec-state (second (assoc 'phrec-state phrase-fs)))
	word-to-add arc  phrec-level phrec-head-fs phrec-key
	other-slots new-state word-has-ortho-but-used-root phrec-phrase
	ortho-to-add)

    (setq word-to-add (if use-ortho
			  (second (assoc 'ortho word-fs)) ; then
			  (second (assoc 'root word-fs)))) ; else
    (setf ortho-to-add (or (second (assoc 'ortho word-fs))
			   (second (assoc 'root word-fs))))
    ;; this is disallowed on anything but heads
    (if (and (not use-ortho)
	     (assoc 'ortho word-fs))
	(setq word-has-ortho-but-used-root T))
    (unless phrec-state
      (if *analyzer-show-warnings*
	  (format *standard-output*
		  "~&[Phrec] WARNING: Could not find phrec-state after word ~S ~%"
		  word-to-add))
      (return-from call-phrec-step-no-or NIL))

    (setf phrec-phrase (concatenate 'string (second (assoc 'phrec-phrase phrase-fs)) " " ortho-to-add))

    (unless word-to-add			; ortho was T, and
					;didn't have an ortho slot
      (return-from call-phrec-step-no-or NIL))

    (when (assoc 'phrec-phrase phrase-fs)
	(setf phrase-fs (remove (assoc 'phrec-phrase phrase-fs) phrase-fs :test #'equal))
	(push `(phrec-phrase ,phrec-phrase) phrase-fs))
      
    (setq arc (phrec-step *eng-phrec* phrec-state word-to-add))
    (unless arc (return-from call-phrec-step-no-or NIL))
    (setq new-state (phrec-next-state arc))
    (setq phrec-key (phrec-key arc))


    ;; take the phrase fs apart
    (dolist (slot phrase-fs)
      (cond ((eq (car slot) 'phrec-level)
	     (setq phrec-level (1+ (second slot))))
	    ((eq (car slot) 'phrec-head)
	     (setq phrec-head-fs (second slot)))
	    ((eq (car slot) 'phrasal))	; do nothing - remove this slot
	    ((eq (car slot) 'phrec-state))	; do nothing - remove this slot
	    ((eq (car slot) 'phrec-phrase))
	    (t				; some other slot -- keep it around
					; (but there shouldn't be anything?!)
	     (push slot other-slots))))

    ;; 28-Nov-95 by EHN - trace
    ;;(format t "~%OTHER-SLOTS: ~s" other-slots)

    ;; if word was inflected, but is not head, bug out
    ;; (if -ed or -ing not in DMK, then need to test for those)
    (if (and (not (phrec-headp arc))
	     word-has-ortho-but-used-root)
	(return-from call-phrec-step-no-or NIL))
    
    ;; add word fs, if head
    (if (phrec-headp arc)
	(setq phrec-head-fs
	      (cons (list (intern (format nil "HEAD~A" phrec-level))
			  (append
			   (if word-has-ortho-but-used-root
			       (list (list 'word-inflected '+)) ; then
			       NIL) ; else
			   word-fs))
		    phrec-head-fs))
      )
    
    ;; 28-Nov-95 by EHN - Trace
    ;; (format t "~%PHREC-HEAD-FS: ~s" phrec-head-fs)

    ;; 28-Nov-95 by EHN - add token at the top level.
    (let ((tok (assoc 'token word-fs)))
      (when tok
	(setq other-slots (cons tok other-slots))))

    ;; build new phrase fs
    (values
     (append
      (if phrec-key
	  (list (list 'phrasal phrec-key ))	; then
	  NIL)				; else
      (if phrec-phrase
	  (list (list 'phrec-phrase phrec-phrase))
	  NIL)
      (if phrec-head-fs
	  (list (list 'phrec-head phrec-head-fs)) ; then
	  NIL)				; else	  
      (cons (list 'phrec-level phrec-level)
	    (cons (list 'phrec-state new-state)
		  other-slots))))))

(defun phrec-clean-up-phrase (fs)
  (let (result)
    (cond ((or-fs-p fs)
	   (dolist (single-fs (cdr fs))
	     (setq result (append
			   result (phrec-clean-up-phrase-no-or single-fs)))))
	  (t (setq result (phrec-clean-up-phrase-no-or fs))))

    (setq result (remove-if #'null result))
    (setq result (remove-duplicates result :test #'equal))
    ;;  8-Feb-94 by amf:
    ;; why was this commented out?!
    (setq result (remove-redundant-heads result))
    (if (= (length result) 1)
	(setq result (car result)) ; then
	(setq result (cons '*OR* result))) ; else
    (values result)))

;; ----------------------------------------------------------------------
;; 28-Nov-95 by EHN -- put multiple TOKEN slots into a *MULTIPLE*.
;;  6-Dec-95 by EHN -- how about *OR*? lets existing gra rules that
;; disambig head token to work undisturbed?
;;  6-Dec-95 by EHN -- how about separate head-token slot?

(defun phrec-bundle-tokens (fs)
  (let (tokens head-token)
    (dolist (slot fs)
      (when (eq 'token (first slot))
	     (setq tokens (cons (second slot) tokens))))
    (cond ((null tokens)
	   (values nil nil))
	  ((> (length tokens) 1)
	   (setq tokens (sort tokens #'>))
	   (setq head-token (list 'head-token (first tokens)))
	   (setq tokens (list 'token (cons '*multiple* tokens)))
	   (values head-token tokens))
	  (t (setq head-token (list 'head-token (first tokens)))
	     (setq tokens (cons 'token tokens))
	     (values head-token tokens)))))
    

;;;------------------------------------------------------------------;
;;; remove-unique-redundant-heads reduces the list pf recognized phrases 
;;; based on the cat of the phrase, cat of head, etc.

;;; pjordan modified to base reduction on cat and sem of the phrase, cat
;;; of head, etc.

(defun remove-redundant-heads (fs-list)
  (let (unique-head-list)
    (dolist (fs fs-list)
      (let ((unique-fs-with-same-head
	     (find (list (second (assoc 'SEM fs))
			 (second (assoc 'CAT fs)))
		   unique-head-list
		   :test #'equal
		   :key #'(lambda (fs) (list (second (assoc 'SEM fs))
					     (second (assoc 'CAT fs)))))))
	(cond ((not unique-fs-with-same-head)
	       ;; first phrase with this cat
	       (push fs unique-head-list))
	      ((not (eq
		     (second (assoc 'CAT unique-fs-with-same-head))
		     (second (assoc 'HEAD-FS-CAT unique-fs-with-same-head))))
	       ;; head of unique fs does not have same cat as phrase
	       (setq unique-head-list
		     (remove unique-fs-with-same-head unique-head-list
			     :test #'equal))
	       (push fs unique-head-list))

	      (;; if it's a verb, don't remove if different FORM
	       (eq (second (assoc 'CAT fs)) `V)
	       (let ((old-form (assoc 'FORM unique-fs-with-same-head))
		     (new-form (assoc 'FORM fs)))
		 (unless (equal old-form new-form)
		   (push fs unique-head-list))))
	      )))
    (values unique-head-list)))
	       

(defun remove-redundant-v-head (list)
  "filter out N phrases with V head if there is also a N head"
  (let ((n-head-phrase (find-if #'(lambda (fs)
				    (and (eq (second (assoc 'cat fs)) 'N)
					 (eq (second (assoc 'head-fs-cat fs)) 'N)))
				list))
	v-head-phrases)

    ;; collect all phrases with V head
    (dolist (fs list)
      (if (and
	   (eq (second (assoc 'cat fs)) 'N)
	   (eq (second (assoc 'head-fs-cat fs)) 'V))
	  (push fs v-head-phrases)))

    ;; remove all phrases with v heads
    (dolist (fs v-head-phrases)
      (setq list (remove fs list :test #'equal)))
    (values list)))
					  

(defun phrec-clean-up-phrase-no-or (fs)
  "cleans up phrasal f-structure to remove trie crud"
  (let* ((phrec-key (second (assoc 'phrasal fs)))
	 (phrec-head (second (assoc 'phrec-head fs)))	 
	 (phrec-phrase (when (second (assoc 'phrec-phrase fs))
			 (string-left-trim " " (second (assoc 'phrec-phrase fs)))))
	 (dmk-entries (arcval-dmk-entries (phrec-query
					   *eng-phrec* phrec-key)))
	 (phrase-last-token (second (assoc 'phrase-last-token fs)))
	 result head-toknum phrase-toknums phrase-tokens phrase-end-token entry)

    ;; 28-Nov-95 by EHN -- bundle multiple tokens together.
    (multiple-value-setq (head-toknum phrase-toknums)
      (phrec-bundle-tokens fs))
    ;;(format t "~%PHRASE-TOKNUMS: ~s" phrase-toknums)

    (setf phrec-phrase
	  (replace-string "" "{/idiom}"
			  (replace-string "" "{idiom}" phrec-phrase)))
    (setf phrec-phrase
	  (string-right-trim " " (string-left-trim " " phrec-phrase)))
    (setf phrase-tokens (preprocess-word-string phrec-phrase :tokenize-final-period NIL))
    (setf phrase-tokens (remove (first (last phrase-tokens)) phrase-tokens))
    ;; 12-Dec-94 by amf: 
    ;; this will always return first instance of a phrase.
    ;; why was this done?
;    (setf phrase-end-token (+ (length phrase-tokens)
;                              (search phrase-tokens
;                                      (capitalize *parser-token-list* nil)
;                                      :test #'equal)))
    (setq phrase-end-token phrase-last-token)
    (dolist (dmk-entry dmk-entries)
      (setq entry (phrec-clean-up-phrase-single-dmk-entry
			  phrec-key phrec-head phrec-phrase phrase-end-token dmk-entry))
      (let ((existing (assoc 'token entry)))
	(when existing
	  ;;(format t "~%Replacing  ~s with ~s..." existing phrase-toknums)
	  (rplacd existing (copy-tree (rest phrase-toknums)))))
      (setq entry (cons head-toknum entry))
      (setq result (push entry result)))
    
    (values result)))



(defun phrec-clean-up-phrase-single-dmk-entry (phrec-key
					       phrec-head
					       phrec-phrase
					       phrase-end-token
					       dmk-entry)
  (let (actual-phrase-head head-fs phrase-syn-features phrase-sem
			   syn-features-from-dmk-entry fixed-not-slot
			   head-fs-cat 
			   other-head-fs head-fs-slot)

    ;; find actual phrase head
    (cond (dmk-entry
	   (setq actual-phrase-head (dmk-phrase-head dmk-entry)))
	  (t
	   (if *analyzer-show-warnings*
	       (format *standard-output*
		       "~&[Phrec] WARNING: Could not find DMK entry for phrase ~S ~%"
		       phrec-key))
	   (setq actual-phrase-head NIL)))

    ;; get f-structure for actual phrase head
    (cond ((and actual-phrase-head
		(not (eq actual-phrase-head :NONE)))
	   (setq head-fs-slot (assoc (intern (format nil "HEAD~A" actual-phrase-head))
				     phrec-head))
	   (setq head-fs (second head-fs-slot))
	   (setq other-head-fs (remove head-fs-slot phrec-head :test #'equal)))
	  (t				; phrase does not have head
	   (setq other-head-fs phrec-head)))

    (if (other-heads-have-morphology other-head-fs)
	(return-from phrec-clean-up-phrase-single-dmk-entry NIL))
    
    (unless head-fs
      (if (and *analyzer-show-warnings*
	       *phrec-show-head-fs-warning*
	       (not (eq actual-phrase-head :NONE)))
	  (format *standard-output*
		  "~&[Phrec] WARNING: Could not find head fs for phrase ~S ~%"
		  phrec-key)))

    ;; make sure we don't error out later
    (unless dmk-entry (setq dmk-entry *empty-dmk-entry*))

    ;; now, take f-structure from head word,
    ;; change ROOT to phrase root,
    ;; test CAT against cat indicated in phrase DMK entry,
    ;; and override syn features from head word fs with syn features
    ;; from phrase entry.

    ;; But, if a syn feature from the phrase dmk entry has a :NOT
    ;; in it, then try to see if it clashes with the same slot in 
    ;; the head fs. If it clashes, bug out. If no, change the slot to
    ;; something without :NOT.
    (setq syn-features-from-dmk-entry
	  (dmk-syn-features dmk-entry))
    (dolist (synfeat syn-features-from-dmk-entry)
      (when (feat-has-not-p synfeat)
	(setq fixed-not-slot (fix-not-slot synfeat head-fs))
	;; bug out if the :NOT slot failed to unify against
	;; the head f-structure feature
	(if (eq fixed-not-slot *tut-fail-value*)
	    (return-from phrec-clean-up-phrase-single-dmk-entry NIL))
	
	(setq syn-features-from-dmk-entry
	      (cons fixed-not-slot
		    (remove synfeat syn-features-from-dmk-entry)))))
    
    (setq phrase-syn-features syn-features-from-dmk-entry)
    ;; override syn features from  head with syn features from phrase

    ;; pjordan added to provide SEM feature for phrases without
    ;; a specified head or that don't have a sem feature which
    ;; is a problem if the head is an ordinal

    (when (or (not head-fs)
	      (not (assoc 'SEM head-fs)))
      (setq phrase-syn-features
	    (cons `(SEM ,(dmk-concept dmk-entry)) phrase-syn-features)))
    
    (dolist (slot head-fs)
      (cond
       ;; slot in phrasal syn-features -- do nothing
       ((assoc (car slot) phrase-syn-features))
       ;; ignore ROOT slot
       ((eq (car slot) 'ROOT))
       ;; ignore TOKEN slot
       ((eq (car slot) 'TOKEN)) 
       ;; test CAT slot to make sure the morphology of the head is
       ;; appropriate for the DMK POS, i.e. head has to have same POS
       ;; as phrasal DMK entry. Can't do this, because -ing heads in
       ;; phrasal nouns might be verb. Eventually, need to check whether
       ;; head word has DMK entry with same POS as phrase.
       ;; If so, reject other POS. Otherwise, do nothing
       ;; strict test: pos of phrasal DMK entry and pos of head fs
       ;; must be same.
       ;; (but if -ing not in DMK as nouns, need to test for those
       ;; especially).
	
       ((eq (car slot) 'CAT)
	(setq head-fs-cat (second slot))
	(if (and
	     *strict-phrase-checking*
	     (not (eq head-fs-cat (dmk-pos dmk-entry)))
	     ;; Prop, N, SYM, UNIT are all interchangeable
	     (not (and (member (dmk-pos dmk-entry) '(PROP N SYM UNIT))
		       (member head-fs-cat '(PROP N SYM UNIT))))
	     )
	    ;; THEN:
	    ;; the CAT of head fs is not equal to CAT of phrasal DMK entry
	    (return-from phrec-clean-up-phrase-single-dmk-entry NIL)))
       ((eq (car slot) 'SEM);;28-Sep-93 by pjordan update concept to phrase concept
	(setq phrase-syn-features (cons `(SEM ,(dmk-concept dmk-entry)) phrase-syn-features)))
       (t				; slot not in phrasal syn-features -- copy it over
	(setq phrase-syn-features (cons slot phrase-syn-features)))))
    ;; add root, cat, orthographic phrase and token number for the last word in the phrase
    (setq phrase-syn-features
	  (cons (list 'ROOT phrec-key)
		(cons (list 'PHRASE phrec-phrase)
		      (cons (list 'TOKEN phrase-end-token)
			    (cons (list 'CAT (dmk-pos dmk-entry))
				  (cons (list 'HEAD-FS-CAT (or head-fs-cat
							       'NONE))
					phrase-syn-features))))))

    ;; return properly constructed phrase fs
    (values phrase-syn-features)))
    
(defun feat-has-not-p (feat)
  (and (listp feat)
       (listp (second feat))
       (eq (car (second feat)) :NOT)))

(defun fix-not-slot (synfeat f-structure)
  (let* ((not-feat (car synfeat))
	 (not-value (second synfeat))
	 (f-struc-not-slot (assoc not-feat f-structure))
	 (*tut-operator-style* :keyword)
	 f-struc-value unif-result)
    (cond (f-struc-not-slot
	   (setq f-struc-value (second f-struc-not-slot))
	   (if (and (listp f-struc-value)
		    (eq '*OR* (car f-struc-value)))
	       (setq f-struc-value (cons :OR (cdr f-struc-value))))
	   (setq unif-result (tree-unify not-value f-struc-value))
	   (if (eq unif-result *tut-fail-value*)
	       (return-from fix-not-slot *tut-fail-value*))
	   (if (and (listp unif-result)
		    (eq (car unif-result) :OR))
	       (setq unif-result (cons '*OR* (cdr unif-result))))
	   (return-from fix-not-slot (list not-feat unif-result)))
	  ((eq not-feat 'NUMBER)
	   (if (and (listp not-value)
		    (> (length not-value) 1)
		    (eq (second not-value) 'PL))
	       (return-from fix-not-slot (list 'NUMBER 'SG))
	       (return-from fix-not-slot (list 'NUMBER 'PL))))
	  (t
	   (return-from fix-not-slot
	     (list 'phrase-not-slot-discarded (list synfeat)))))))
	       
(defun word-fs-is-ing-verb (fs)
  (let (cat-slot cat-value ortho-slot ortho-value ortho-len)
    (and (listp fs)
	 (setq cat-slot (assoc 'cat fs))
	 (listp cat-slot)
	 (setq cat-value (second cat-slot))
	 (eq cat-value 'V)
	 (setq ortho-slot (assoc 'ortho fs))
	 (listp ortho-slot)
	 (setq ortho-value (second ortho-slot))
	 (setq ortho-len (length ortho-value))
	 (> ortho-len 3)
	 (string= ortho-value "ing" :start1 (- ortho-len 3)))))

;;; fs-list is:
;;;
;;;    ((HEAD2
;;;        ((ROOT "position") (CAT V) (VALENCY TRANS)))
;;;     (HEAD1
;;;        ((WORD-INFLECTED +) (ROOT "lock") (ORTHO "locked")
;;;         (CAT V)
;;;         (VALENCY (*OR* TRANS INTRANS))
;;;         (FORM (*OR* PAST PASTPART)))))))
;;;
;;; the real head has been removed from this list. Check the other
;;; ones for inflection.

(defun other-heads-have-morphology (fs-list)
  (dolist (head-fs fs-list)
    (if (assoc 'word-inflected (second head-fs))
	(return-from other-heads-have-morphology T))))
