;;;==================================================================;
;;;  -*- Syntax: Common-Lisp; Package: USER; Base: 10; Mode: LISP -*-
;;;==================================================================;
;;;
;;;               Center for Machine Translation
;;;                 Carnegie-Mellon University
;;;                                                                       
;;;------------------------------------------------------------------;
;;;                                                                       
;;; Copyright (c) 1995
;;; Carnegie Mellon University. All Rights Reserved.                      
;;;                                                                       
;;;------------------------------------------------------------------;
;;;
;;;          File: grammar-callouts.lisp
;;;  File created: 16-Apr-93 by amf
;;;        Author: Alex Franz [amf@cs.cmu.edu]
;;; Last Modified:                       7-Mar-96, 16:39-May-95 at 13:03
;;;   
;;;------------------------------------------------------------------;
;;; File Description:
;;;
;;; Miscellaneous grammar callouts.
;;;
;;; Phrase grammar callouts are in separate file.
;;; 
;;; See also: phrase-grammar-callouts.lisp

;;;==================================================================;
;;; Documentation

;;;==================================================================;
;;; Change Log
;;;

;;;==================================================================;

;;; Package Statements

(in-package :user)


;;;==================================================================;

;;; Configuration

(proclaim '(special *disambiguator-non-string-roots*))


;;;==================================================================;

;;; Global Variables

(defvar *noun-noun-compounding-allowed*)
(setq *noun-noun-compounding-allowed* nil)

(defun with-nn ()
  (setq *noun-noun-compounding-allowed* t))

(defun without-nn ()
  (setq *noun-noun-compounding-allowed* nil))


;; 23-Jul-96 by EHN -- Obsolete.
;; 
;; (defvar *phrasal-score-filter*)
;; (setq *phrasal-score-filter* nil)
;; 
;; (defun with-filter ()
;;   (setq *phrasal-score-filter* t))
;; 
;; (defun without-filter ()
;;   (setq *phrasal-score-filter* nil))


;;;==================================================================;

;;; Utility functions

(defun replace-string (new-string old-string string)
  "Replaces every occurrence of OLD-STRING in STRING with NEW-STRING.  If
OLD-STRING does not occur in STRING, returns STRING; otherwise creates and
returns a new string.  Also returns second value: T iff replacements were made."
  (let ((new-len (length new-string))
	(old-len (length old-string))
	(s-len (length string))
	(s-pos 0)
	(positions nil)
	(pos-len 0)
	(result string)
	(r-pos 0))
    (if (zerop old-len)
	string
      (progn
	;; Find all instances of OLD-STRING in STRING
	(loop
	 (if (setf s-pos (search old-string string
				 :start2 s-pos :test #'string=))
	     (progn
	       (push s-pos positions)
	       (incf s-pos old-len))
	   (return)))

	;; If OLD-STRING was found...
	(when positions

	  ;; Allocate a result string of the correct length
	  (setf positions (nreverse positions)
		pos-len (length positions)
		result (make-string (+ (- s-len (* pos-len old-len))
				       (* pos-len new-len))
				    :initial-element #\Space))

	  ;; Fill the result string
	  (setf s-pos 0)
	  (loop
	   (when (>= s-pos s-len)
	     (return))
	   (cond ((and positions
		       (= s-pos (first positions)))
		  (replace result new-string :start1 r-pos)
		  (incf s-pos old-len)
		  (incf r-pos new-len)
		  (pop positions))
		 (t
		  (setf (schar result r-pos) (schar string s-pos))
		  (incf s-pos)
		  (incf r-pos)))))

	(values result (plusp pos-len))))))




;;;==================================================================;

;;; Symbol concatenation functions
;;;
;;; Used in rules like the following:
;;; 
;;; String of one token:
;;;
;;; (<string> <== (%)
;;;           (((x0 value) <= (copy-symbol-name (x2 value)))))
;;;
;;; String of multiple tokens:
;;;
;;; (<string-in> <== (<left-boundary>)
;;;               ())
;;;
;;; (<string-in> <== (<string-in>  %)
;;;              (((x2 value) = (*not* <right-boundary>))
;;;               ((x0 tokens) <= (cons (x2 value) (x1 tokens)))))
;;;
;;; (<string> <== (<string-in> <right-boundary>)
;;;           (((x0 value) <= (reverse-concatenate-symbols-spaced (x1 tokens)))))


(defun copy-symbol-name (symbol)
  "Copies the string name of SYMBOL."
  (copy-seq (symbol-name symbol)))


(defun concatenate-symbols-spaced (symbols)
  (let ((i 0)
	string
	new-string)
    ;; Figure out the length
    (do ((ss symbols (rest ss))
	 (first t nil))
	((endp ss))
      ;; Add room for a space
      (unless first
	(incf i))
      (incf i (length (symbol-name (first ss)))))
    ;; Allocate it
    (setf new-string (make-string i)
	  i 0)
    ;; Then fill it
    (do ((ss symbols (rest ss))
	 (first t nil))
	((endp ss))
      ;; Add a space
      (unless first
	(setf (schar new-string i) #\Space)
	(incf i))
      ;; Copy the string
      (setf string (symbol-name (first ss)))
      (replace new-string string :start1 i)
      (incf i (length string)))
    new-string))

(defun reverse-concatenate-symbols-spaced (symbols)
  (concatenate-symbols-spaced (nreverse symbols)))


;;;==================================================================;

;;; Check for word-final 's'

(defun word-final-s (object)
  (let ((string (string object)))
    (and (plusp (length string))
	 (char-equal #\s (schar string (1- (length string)))))))


;;;==================================================================;


(defun get-ing-forms (fs)
  (let (result fs-list)
    (cond ((or-fs-p  fs)
	   (setq fs-list (cdr fs)))
	  (t ; not an or
	   (setq fs-list (list fs))))
    (setq result
	  (remove-if-not #'word-fs-ing-form-p fs-list))
    (cond ((second result)
	   (values (cons '*OR* result)))
	  (t ; only one result
	   (values (car result))))))
	   
;;modified by pjordan
;;added test to check for non-ing words that end in "ing" (e.g. swing)
;;changed to expect a list to be returned from eng-dmk-entry-cat
(defun word-fs-ing-form-p (fs)
  
  "is this an fs for an -ing string that could be analyzed as the
   present participle of a verb?"
  
  (let (root-slot root-value root-len ortho-slot ortho-value ortho-len entries)
    (and (listp fs)
	 (or
	  (and
	   (setq ortho-slot (assoc 'ortho fs))
	   (listp ortho-slot)
	   (setq ortho-value (second ortho-slot))
	   (stringp ortho-value)
	   (setq ortho-len (length ortho-value))
	   (> ortho-len 4)
	   (string= ortho-value "ing" :start1 (- ortho-len 3))
	   ;must allow for dropped -e "increase" -> "increasing"
	   ;must allow for doubling "run" -> "running"
	   (setq entries (or (eng-dmk-entry-cat (subseq ortho-value 0 (- ortho-len 3)) 'V)
			     (eng-dmk-entry-cat (concatenate 'string
						 (subseq ortho-value 0 (- ortho-len 3))
						 "e") 'V)
			     (eng-dmk-entry-cat (subseq ortho-value 0 (- ortho-len 4)) 'V)))

	   ;;sometimes there is an entry which is an abbrev. (eg. "sw"
	   ;;entry must handle for tests on "swing")

	   (dolist (entry entries nil)
	     (when 
		 (not (eq '+ (second (assoc 'abb (dmk-syn-features entry)))))
	       (return t))))
	  
	  (and 
	   (setq root-slot (assoc 'root fs))
	   (listp root-slot)
	   (setq root-value (second root-slot))
	   (stringp root-value)	   
	   (setq root-len (length root-value))
	   (> root-len 4) ;pjordan changed from 3 to 4 for cases like "ring"
	   (string= root-value "ing" :start1 (- root-len 3))
	   ;must allow for dropped -e "increase" -> "increasing"
	   ;must allow for doubling "run" -> "running"
	   (setq entries (or (eng-dmk-entry-cat (subseq root-value 0 (- root-len 3)) 'V)
			     (eng-dmk-entry-cat (concatenate 'string
						 (subseq root-value 0 (- root-len 3))
						 "e") 'V)
			     (eng-dmk-entry-cat (subseq root-value 0 (- root-len 4)) 'V)))
	   
	   ;;sometimes there is an entry which is an abbrev. (eg. "sw"
	   ;;entry must handle for tests on "swing")

	   (dolist (entry entries nil)
	     (when 
		 (not (eq '+ (second (assoc 'abb (dmk-syn-features entry)))))
	       (return t))))))))



(defun modifier-test (fs1 fs2)
  (let (result fs0-list fs1-list fs2-list)
    (cond ((or-fs-p  fs1)
	   (setq fs1-list (cdr fs1)))
	  (t ; not an or
	   (setq fs1-list (list fs1))))
    (cond ((or-fs-p  fs2)
	   (setq fs2-list (cdr fs2)))
	  (t ; not an or
	   (setq fs2-list (list fs2))))
    (setq result
	  (remove-if-not #'allowed-modifier-p fs2-list))
    (when result  ;make all possible attachments
      (dolist (modifiee fs1-list)
	(dolist (modifier result)
	  (setf fs0-list
		(cons (append modifiee `((n-mod ,modifier)))
		      fs0-list)))))
    (cond ((second fs0-list)
	   (values (cons '*OR* fs0-list)))
	  (t ; only one result
	   (values  (car fs0-list))))))

(defun allowed-modifier-p (fs &aux valency)

  ; if one of these conditions holds then the f-structure is a legal modifier
  ;    the present participle formed from an intransitive verb
  ;    the past participle formed from a transitive verb (adjectival passive)
  ;    a noun that is unambiguously a noun (I'm not sure why I'm being so
  ;                                         restrictive?? - just check for
  ;                                         an underived noun??)
  (setf valency (second (assoc 'valency fs)))
  (when (and valency
	     (not (listp valency)))
    (setf valency `(,valency)))
  (or (and (equal (second (assoc 'nominalized fs)) '+)
	   (equal (second (assoc 'form fs)) 'prespart)
	   (member 'intrans valency :test #'equal))
      (and (equal (second (assoc 'passive fs)) '+)
	   (equal (second (assoc 'form fs)) 'pastpart)
	   (member 'trans valency :test #'equal))
      (and (equal (second (assoc 'cat fs)) 'n)
	   (not (assoc 'nominalized fs))
	   (= 1 (length (eng-dmk-entry (second (assoc 'root fs))))))))

;pjordan modified to lookup entry without -s on the end
(defun pos-not-in-dmk (root pos &optional (plural nil) (idiom nil))
  (let* ((all-dmk-entries (cond ((and (not plural)
				 (not idiom))
			    (or (eng-dmk-entry root)
				(eng-phrasal-dmk-entry root)))
			   ((and idiom
				 (not plural))
			    (eng-phrasal-dmk-entry (string-append "{idiom}"
								  root
								  "{/idiom}")))
			   ((and (not idiom)
				 plural)
			    (or (eng-dmk-entry
				 (subseq root 0 (- (length root) 1)))
				(eng-phrasal-dmk-entry
				 (subseq root 0 (- (length root) 1)))))
			   (t
			    (eng-phrasal-dmk-entry
				 (subseq (string-append "{idiom}"
							root
							"{/idiom}")
					 0 (- (length root) 1))))))
	 ;; remove all (CTE -) entries
	 (dmk-entries (remove-if #'(lambda (entry)
				     (assoc 'CTE (dmk-syn-features entry)))
				 all-dmk-entries)))
    (values (not (find-if #'(lambda (dmk-entry)
			      (eq (dmk-pos dmk-entry) pos))
			  dmk-entries)))))



;; 23-Jul-96 by EHN -- Obsolete.
;; 
;; (defun phrasal-score (fs)
;;   (let (result fs-list old-phrasal-score-slot)
;;     (cond ((or-fs-p fs)
;; 	   (setq fs-list (cdr fs)))
;; 	  (t		              ; not an or
;; 	   (setq fs-list (list fs))))
;; 
;;     (setq result
;; 	  (mapcar #'(lambda (fs) 
;; 		      (if (setq old-phrasal-score-slot
;; 				(assoc 'phrasal-score fs))
;; 			  (setq fs
;; 				(remove old-phrasal-score-slot fs
;; 					:test #'equal)))
;; 		      (append fs
;; 			      (list
;; 			       (list
;; 				'phrasal-score
;; 				;0
;; 				(calculate-phrasal-score fs)
;; 				))))
;; 		  fs-list))
;; 
;;     (setq result (stable-sort result #'less-than-by-phrasal-score))
;;     (cond (*phrasal-score-filter*
;; 	   ;; return most phrasal readings
;; 	   (setq result
;; 		 (remove-if-not #'(lambda (fs)
;; 				    (eq-phrasal-score-p (first result) fs))
;; 				result))
;; 	   (cond ((second result)
;; 		  (setq result (cons '*OR* result)))
;; 		 (t (setq result (car result))))
;; 	   (values result))
;; 	  ((second result)
;; 	  ;; don't filter
;; 	   (values (cons '*OR* result)))
;; 	  (t
;; 	   ;; only one result
;; 	   (values (car result))))))

;;----------------------------------------------------------------------
;; 2. Patches to grammar-callouts.lisp
;; 
;;  The function CALCULATE-PHRASAL-SCORE has been patched to make the
;;  treatment of *OR* the same as *MULTIPLE* (see code comments).
;; 

;; 23-Jul-96 by EHN -- Obsolete.
;; (defun calculate-phrasal-score (input-fs)
;;   (let ((result 0)
;; 	fs-list)
;;     (unless (and input-fs
;; 		 (listp input-fs)
;; 		 (second input-fs))
;;       (return-from calculate-phrasal-score 0))
;;     (cond ((eq (first input-fs) '*OR*)
;; 	   ;; 25-Sep-95 by EHN -- make this the same as multiple
;; 	   (setq fs-list (list (second input-fs)))
;; 	   ;;(setq fs-list (cdr input-fs))
;; 	   )
;; 	  ((eq (first input-fs) '*MULTIPLE*)
;; 	   (setq fs-list (cdr input-fs)))
;; 	  (t				; single fs
;; 	   (setq fs-list (list input-fs))))
;;       
;;     (dolist (fs fs-list) 
;;       (when (and fs
;; 		 (listp fs)
;; 		 (listp (first fs)))
;; 	(when  (assoc 'root fs) 
;; 	  (incf result))
;; 	(dolist (slot fs)
;; 	  (when (and slot
;; 		     (listp slot)
;; 		     (second slot)
;; 		     (listp (second slot)))
;; 	    (incf result (calculate-phrasal-score (second slot)))))))
;;     (values result)))
;; 
;; 
;; (defun eq-phrasal-score-p (fs1 fs2)
;;   (let* ((score1 (second (assoc 'phrasal-score fs1)))
;; 	 (score2 (second (assoc 'phrasal-score fs2))))
;;     (equal score1 score2)))
;; 	 
;; 
;; (defun less-than-by-phrasal-score (fs1 fs2)
;;   "is fs1 strictly less than fs2?"
;;   (let* ((score1 (second (assoc 'phrasal-score fs1)))
;; 	 (score2 (second (assoc 'phrasal-score fs2))))
;;     (cond ((and (null score1)
;; 		(null score2))
;; 	   nil)
;; 	  ((null score2)
;; 	   t)	  
;; 	  ((null score1)
;; 	   nil)
;; 	  ;; both numbers
;; 	  ((< score1 score2))))) 

;To find a Verb[pastpart] entry for a given string:
;Example:
;(let ((*tut-operator-style* :original))
;(tree-unify '((CAT V) (FORM PASTPART)) (parse-eng-word "powered")))
;==> ((ROOT "power") (ORTHO "powered") (CAT V) (VALENCY TRANS) (FORM PASTPART))

(defun find-transitivity (root)
  (let ((*tut-operator-style* :original)
	result)
    (cond ((stringp root)
	   (setq result (tree-unify '((CAT V) (FORM PASTPART)) (parse-eng-word root)))
	   (if (eq result *tut-fail-value*)
	    ;;then no verb entry
	    'none 
	    ;; else result is a verb entry
	    (let ()
	      (when (equal '*OR* (first result))
		(setq result (second result)))
	      (if (listp (setq result (second (assoc 'valency result))))
	          ;;if it is an '(*OR* TRANS INTRANS) call it a TRANS
	          'trans
		  ;;otherwise use whatever is there
		  result))))
	  (t 'none))))

(defun get-uninflected-v (root)
  (let ((*tut-operator-style* :original) result)
	    (setf result (tree-unify '((CAT V)) (parse-eng-word root)))
	    (if (eq result *tut-fail-value*)
	      nil
	      (if (eq '*or* (first result))
		  (second (assoc 'root (second result)))
		  (second (assoc 'root result))))))

	   
(defun mk-temp-v-fs (root)
  (list (list 'CAT 'V)
	(list 'ROOT (get-uninflected-v root))))
    
(defun past-form-p (root)
  (let ((*tut-operator-style* :original)
	result)
    (cond ((stringp root)
	   (setq result (tree-unify '((CAT V)) (parse-eng-word root)))
	   (if (eq result *tut-fail-value*)
	    ;;then no verb entry
	    nil 
	    ;; else result is a verb entry
	    (let ()
	      (if (eq '*OR* (first result))
		(setq result (cdr result))
		(setq result (list result)))
	      (dolist (reslt result nil)
		(setf reslt (second (assoc 'form reslt)))
		(when (listp reslt)
		  (setq reslt (first (or (member 'past (cdr reslt))
					  (member 'pastpart (cdr reslt))))))
		(when 
		    (or (eq 'past reslt)
			(eq 'pastpart reslt))
		  (return t)))))))))
		  

;;;------------------------------------------------------------------;
;;;
;;; get-next-token
;;;
;;; is "token" a global variable? Nope - I forgot to declare it -
;;; just added it!

(defun get-next-token (string &optional (delimiter #\Space))
  (let (token result token-end (char #\>)
	      max-len values)
    (setf result (string-left-trim " " string))
    (setf max-len (length result))
    (setf token-end
	  (do ((i 0 (incf i)))
	      ((or (char= delimiter char) (= i max-len)) (1- i))
	    (setf char (char result i))))
    (setf token (string-right-trim " "
				   (subseq result 0 token-end)))
    (setf result (string-left-trim `(,delimiter)
				   (subseq result token-end)))
    (when (string-equal token "*not*")
      (setf token
	    (do ()
		((string-equal token "*apostrophe*") `(*not* ,(append '(*or*) values)))
	      (multiple-value-setq (token result) (get-next-token result))
	      (when (not (string-equal token "*apostrophe*"))
		(push (read-from-string token) values)))))
    (when (not (= (1+ token-end) max-len))
      (values token result))))

;;;------------------------------------------------------------------;
;;;
;;; action-verb-p returns T if the string is a verb
;;; that has been categorized as being more active
;;; than stative.
;;;
;;; used to pick passive reading over --ed adjective.

(defun action-verb-p (string)
  (let ((dmk-entry (first (eng-dmk-entry-cat string 'V)))
	class)
    (when dmk-entry
      (setq class (dmk-class dmk-entry))
      (when class
	(string-in-string-p "AGENT" (symbol-name class))))))

;;;------------------------------------------------------------------;
;;;
;;; string-in-string-p returns T if the small string occurs inside
;;; the big string.

(defun string-in-string-p (smallstring bigstring)
  (let ((big-len (length bigstring))
	(small-len (length smallstring)))
    (dotimes (i (1+ (- big-len small-len)))
      (if (string= bigstring smallstring :start1 i :end1 (+ i small-len))
	  (return-from string-in-string-p T)))
    ;; we've been through entire bigstring
    (values NIL)))

;;;------------------------------------------------------------------;
;;;
;;; word-fs-ed-form-p returns T for an -ed string that could
;;; be seen as the past participle for a verb.

(defun word-fs-ed-form-p (fs)
  (unless (listp fs)
    (return-from word-fs-ed-form-p NIL))
  (let (root-slot root-value root-len ortho-slot ortho-value ortho-len entries)
    (or
     ;; use ortho
     (and
      (setq ortho-slot (assoc 'ortho fs))
      (listp ortho-slot)
      (setq ortho-value (second ortho-slot))
      (stringp ortho-value)
      (setq ortho-len (length ortho-value))
      (> ortho-len 4)
      (string= ortho-value "ed" :start1 (- ortho-len 2))
      ;;modified by pjordan - have to allow for cases where e gets dropped
      ;;when forming past participle e.g. discharged -> discharge
      ;;and for doubling of consonants e.g. cancelled -> cancel
      (or (setq entries (eng-dmk-entry-cat (subseq ortho-value 0 (- ortho-len 2)) 'V))
	  (setq entries (eng-dmk-entry-cat (subseq ortho-value 0 (- ortho-len 3)) 'V))
	  (setq entries (eng-dmk-entry-cat (subseq ortho-value 0 (- ortho-len 1)) 'V)))

      ;;sometimes there is an entry which is an abbrev. (eg. "sw"
      ;;entry must handle for tests on "swing")

      (dolist (entry entries nil)
	(when 
	    (not (eq '+ (second (assoc 'abb (dmk-syn-features entry)))))
	  (return t))))

     ;; use root
     (and 
      (setq root-slot (assoc 'root fs))
      (listp root-slot)
      (setq root-value (second root-slot))
      (stringp root-value)	   
      (setq root-len (length root-value))
      (> root-len 4)			;pjordan changed from 3 to 4 for cases like "ring"
      (string= root-value "ed" :start1 (- root-len 2))
      (or (setq entries (eng-dmk-entry-cat (subseq root-value 0 (- root-len 2)) 'v))
	  (setq entries (eng-dmk-entry-cat (subseq root-value 0 (- root-len 3)) 'v))
	  (setq entries (eng-dmk-entry-cat (subseq root-value 0 (- root-len 1)) 'v)))
	   
      ;;sometimes there is an entry which is an abbrev. (eg. "sw"
      ;;entry must handle for tests on "swing")

      (dolist (entry entries nil)
	(when 
	    (not (eq '+ (second (assoc 'abb (dmk-syn-features entry)))))
	  (return t)))))
  ))


;;;==================================================================;

;;; Analysis of SGML tags with internal attributes
;;;
;;; Example of use in rule: 
;;; (<title-opener> <-- (%)
;;;      (((x0 attr) <= (get-internal-attributes (x1 value) "TITLE"))
;;;       (*test* (x0 attr))
;;;       ((x0 cat) = title)
;;;       ((x0 root) = title)))
;;;
;;; Example wildcard value: {TITLE_ID=_*DQ*_FOO_*DQ*_}

(defun match-tag-name (object tagname)
  "Returns T iff OBJECT is a symbol of the form {TAGNAME} or {TAGNAME_...}."
  (let* ((string (string object))
	 (len (length string))
	 (tag-len (length tagname)))
    (and string
	 ;; Length OK?
	 (>= len (+ tag-len 2))
	 ;; Boundaries OK?
	 (char= (schar string 0) #\{)
	 (char= (schar string (1- len)) #\})
	 ;; If OBJECT contains more than TAGNAME, is there a '_' at the end of
	 ;; the name?
	 (or (= len (+ tag-len 2))
	     (char= (schar string (1+ tag-len)) #\_))
	 ;; Names match?
	 (string-equal string tagname :start1 1 :end1 (1+ tag-len)))))


(defun get-tag-attribute (object)
  "Returns any internal attribute in OBJECT as a string."
  (let* ((string (string object))
	 (len (length string))
	 (start (position #\_ string))
	 result)
    (and start
	 ;; Tag has attributes -- extract them
	 (setf
	  ;; Skip over tag name
	  result (nstring-downcase (subseq string (1+ start) (1- len)))
	  ;; Canonicalize punctuation and spacing
	  result (replace-string "\"" "_*dq*_" result)
	  result (nsubstitute #\space #\_ result)
	  result (string-trim '(#\space) result))
	 (plusp (length result))
	 result)))


;;;==================================================================;

;;;

(defun class-p (concept
		&optional (classes '(&P-SIZE &P-TIME &P-MEASURE &P-UNCLASSIFIED)))
  "Returns T iff CONCEPT belongs to any of CLASSES."
  (declare (ignore concept classes))
  ;; (domo-some-is-a-p concept classes)
  T
  )

;;;==================================================================;

;;; This function parses the PI tag and creates an f-structure by pulling
;;; out the relevant information in the tag

(defun parse-cte-tag (object)
  (let* ((string (string object))
	 (len (length string))
	 result ortho sem head modi val dump head-pos stem result eval)
    (when (and string
	       (stringp string)
	       ;; new, old, string
	       ;; 10-Jun-96 by EHN -- fix bug when *percent* is a head.
	       (setf string (replace-string "%%PERCENT%%" "*PERCENT*" string))
	       (setf string (replace-string "_" "__" string))	       
	       (setf string (replace-string " " "_" string))
	       (multiple-value-setq (val result)
		 (get-next-token string #\{))
	       (multiple-value-setq (val result)
		 (get-next-token result))
	       (string-equal val "?CTE"))
      (cond ((string-equal (multiple-value-setq (val result) (get-next-token result))
			   "means")
	     (dotimes (i 2)
	       (cond ((string-equal (multiple-value-setq (val result)
				      (get-next-token result #\=))
				    "text")
		      (multiple-value-setq (dump result) (get-next-token result))
		      (multiple-value-setq (ortho result) (get-next-token result #\*))
		      (multiple-value-setq (dump result) (get-next-token result)))
		     ((string-equal val "val")
		      (multiple-value-setq (dump result) (get-next-token result))
		      (multiple-value-setq (sem result) (get-next-token result))
		      (multiple-value-setq (dump result) (get-next-token result))))))

	    ((string-equal val "eval")
	     (multiple-value-setq (dump result) (get-next-token result #\{))
	     (multiple-value-setq (eval result) (get-next-token result #\}))
	     )	    
			    
	    ((string-equal val "attach")
	     (dotimes (i 3)
	       (cond ((string-equal (multiple-value-setq (val result)
				      (get-next-token result #\=))
				    "head")
		      (multiple-value-setq (dump result) (get-next-token result)) 
		      (multiple-value-setq (head result) (get-next-token result #\*))
		      (multiple-value-setq (dump result) (get-next-token result)))
		     ((string-equal val "head-pos")
		      (multiple-value-setq (dump result) (get-next-token result))
		      (multiple-value-setq (head-pos result) (get-next-token result #\*))
		      (multiple-value-setq (dump result) (get-next-token result)))
		     ((string-equal val "modi")
		      (multiple-value-setq (dump result) (get-next-token result))
		      (multiple-value-setq (modi result) (get-next-token result #\*))
		      (multiple-value-setq (dump result) (get-next-token result)))
		     (t nil))))))

    ;; format the appropriate f-structures
    (cond (sem
	   (if (stringp sem)
	       `((cat cte-means)
		 (ortho ,(string-downcase ortho))
		 (sem ,(read-from-string sem)))
	     `((cat cte-means)
	       (ortho ,(string-downcase ortho))
	       (sem ,sem))))
	  (eval
	   `((cat cte-eval)
		 (eval ,(string-downcase eval))))
	  (head
	   ;;; the structural PI tag is position dependent but since
	   ;;; the location of attachment sites is unknown apriori and could
	   ;;; be in any order, the position information in the PI tag
	   ;;; is computed so that no new structural PI tags are included
	   ;;; in the position information.  This is handled by the 
	   ;;; disambiguator by presenting structural ambiguities in a
	   ;;; right to left order.  Then when these tags are parsed here
	   ;;; the location of each tag is recorded while going left to
	   ;;; right during the parse and the position information
	   ;;; is adjusted to include the preceding PI tags via the function
	   ;;; tag-count.  Positions of structural PI tags is recorded
	   ;;; in the hash table *cte-tag-counter*

	   ;; 31-Aug-94 by EHN - fix LABEL problem
	   ;;  7-Sep-94 by EHN - extend to deal with "-TAG" mark.
	   ;; 7/20/95-igo: need to change references to *disambiguator-non-string-roots*
	   ;; since this var will be no longer supported in the disambiguator.
	   (when (stringp head)
					;(format t "~%~%HEAD: ~s" head)
	     ;; 10-Jun-96 by EHN -- fix bug when *percent* is a head.
	     (setf head (replace-string "*PERCENT*" "%%PERCENT%%" head))
	     (if (and (> (length head) 4)
		      (equal "-TAG" (substring head -4))
		      (setq stem (substring head 0 -5))
		      (find stem *disambiguator-non-string-roots*
			    :key #'symbol-name :test #'equal))
		 (setq head (read-from-string stem))
	       (setq head (string-downcase head))))
		   
	   (setf result
		 (cond ((and (or (stringp head)
				 (find head *disambiguator-non-string-roots*))
			     head-pos)
			`((cat cte-attach)
			  (head ,head)
			  (head-pos ,(tag-count (read-from-string head-pos)))
			  (modi ,(string-downcase modi))))
		       ((or (stringp head)
			    (find head *disambiguator-non-string-roots*))
			`((cat cte-attach)
			  (head ,head)
			  (modi ,(string-downcase modi))))
		       (head-pos
			`((cat cte-attach)
			  (head ,head)
			  (head-pos ,(read-from-string head-pos))
			  (modi ,(string-downcase modi))))
		       (t
			`((cat cte-attach)
			  (head ,head)
			  (modi ,(string-downcase modi))))))
	   (setf (gethash *token-position* *cte-tag-counter*) 1)
	   result))))

;;; count up the structural PI tags to the left of the token of interest
;;; let's me normal and count left to right and adjust the ending condition
;;; as structural tags are found.

(defvar *debug-tag-counts* nil)

(defun tag-count (token-no)
  (cond (*debug-tag-counts*
	 (format t "~%~%Enter the real count for tag, token-no ~s: " token-no)
	 (read))
	(t
	 (let ((token-inc 0)(result 0) incr)
	   (do ((i 0 (incf i)))
	       ;;  3-Sep-94 by EHN -- added "1" to the call to + below.
	       ;; This was missing some tokens that it needed to count.
	       ((= i (+ token-no token-inc 1)))
	     (setf incr (or (gethash i *cte-tag-counter*)
			    0))
	     ;; (format t "~%[TAG-COUNT] Token: ~s Increment: ~s" i incr)
	     (setf token-inc (+ token-inc incr)))
	   (setf result (+ token-no token-inc))
	   ;; this is to account for rightward attachments
	   ;;(print *token-position*)	   
	   (when (>= result *token-position*)
	     (incf result))
	   result))))

;;;==================================================================;


;; ----------------------------------------------------------------------
;; New grammar callout for verbatim tags
;;
;; Given: indices into *parser-token-list* for tokens in a verbatim constituent
;; Return: string containing the original surface for those tokens
;;
;; Check for: strictly-contiguous token ranges; token out-of-range.
;;
;; Example:
;;
;; > (test-sentence "<code>remove the engine from housing.</code>")
;; 1
;; > (get-original-tokens '(2 3 4 5 6 7))
;; "remove the engine from housing."

;; 15-Feb-96 by EHN

(defmacro token-fetch (token slot)
  `(second (assoc ,slot (nth (1- ,token) *tokval*))))

(defvar *tokval*) ;; tokenizer global; only external ref.

;; 12-Jul-96 by EHN -- Added a new call to remove IDIOM tags
;; from the tokenlist, since they aren't part of the original
;; input and were causing problems (since they don't have
;; START, END, etc. in *TOKVAL*).

(defun get-original-tokens (toklist)
  (cond ((and toklist
	      (listp toklist)
	      (every #'numberp toklist)
	      (setq toklist (sort toklist #'<))
	      (check-for-contiguous toklist)
	      (setq toklist (remove-idiom-toklist-entries toklist))
	      )
	 (get-original-tokens-1 toklist))
	(t (error "GET-ORIGINAL-TOKENS: Improper token range: ~a" toklist))))

;; 12-Jul-96 by EHN -- New function, see comments on GET-ORIGINAL-TOKENS.

(defun remove-idiom-toklist-entries (toklist)
  (remove-if #'(lambda (x)
		 (or
		  (eq '{IDIOM} (token-fetch x 'partok))
		  (eq '{/IDIOM} (token-fetch x 'partok))))
	     toklist))

(defun get-original-tokens-1 (tokens)
  (let* ((acceptable (cons 1 (length *tokval*)))
	 (current (pop tokens))
	 (curend (token-fetch current 'end))
	 (curstring (token-fetch current 'rawtok))
	 newstart newstring newend padsize padding)
    ;; 12-Jul-96 by EHN -- Added a new error check to catch problems where
    ;; badly formed tokens sneak in (wasn't catching IDIOM tags left in the
    ;; token list.
    (unless (and current curend curstring)
	    (error "Null value in CURRENT CUREND CURSTRING: ~s ~s ~s" current curend curstring))
    (dolist (new tokens curstring)
      (when (or (< new (first acceptable))
		(> new (rest acceptable)))
	(error "GET-ORIGINAL-TOKENS: Token value out of range of *TOKVAL*: ~a" new))
      (setq newstart (token-fetch new 'start))
      (setq newend (token-fetch new 'end))
      (setq newstring (token-fetch new 'rawtok))
      (setq padsize (1- (- newstart curend)))
      (if (> padsize 0)
	  (setq padding (make-string padsize :initial-element #\Space))
	(setq padding ""))
      (setq curstring (concatenate 'string curstring padding newstring))
      (setq curend newend))))

(defun check-for-contiguous (tokens)
  (let ((current (pop tokens)))
    (dolist (token tokens t)
      (when (not (= current (1- token)))
	  (return nil))
      (setq current token))))
	 
