;;;==================================================================;
;;; -*- Syntax: Common-Lisp; Package: USER; Base: 10; Mode: LISP -*-
;;;==================================================================;
;;;
;;;               Center for Machine Translation
;;;                 Carnegie-Mellon University
;;;                                                                       
;;;------------------------------------------------------------------;
;;;                                                                       
;;; Copyright (c) 1995
;;; Carnegie Mellon University. All Rights Reserved.                      
;;;                                                                       
;;;------------------------------------------------------------------;
;;;
;;;          File: morph-analyzer.lisp
;;;  File created:  6-May-92 by amf
;;;        Author: Masaru Tomita
;;;    Maintainer: Alex Franz [amf@cs.cmu.edu]
;;;                Nicholas Brownlow [ndb@cs.cmu.edu]
;;; Last Modified:  21-Nov-95, 13:32-Jun-95 at 19:05
;;;
;;;------------------------------------------------------------------;
;;; Description                                                      
;;;
;;; English morphological analysis code.
;;; 
;;; The parser calls two functions: 
;;;
;;; (parse-number) and (parse-eng-word)
;;; 
;;; From eng-morph.lisp, misc-patches.lisp.


;;;==================================================================;
;;; Change Log
;;;
;;;  6-May-92 by amf: started changing this code
;;;                   -- new DMK
;;;                   -- TUT unification
;;;                   -- add misc.lisp, misc-patches.lisp material
;;;
;;;  1-Oct-93 by amf: fixed suffix-to-form-fs (SEM feature action)


;;;==================================================================;
;;; Package Statements

(in-package :user)


;;;==================================================================;

;;; Global Variables

;;; From other modules
(proclaim '(special *cache-english-words-p*))

;; this is set in checker-functions.lisp
(defvar *analyzer-show-warnings* nil)
(defvar *morph-trace* nil)
(defvar *analyzer-tag-warning-p* nil)

;;; these items are not looked up in dictionary
;;;(defvar *lexical-rule-items*
;;;  '("s" "*period*" "*comma*" "[" "]" "{" "}"
;;;    "(" ")" "*dq*" "order" "*colon*" "*hyphen*" "not" "*apostrophe*" "*bang*" "*percent*"
;;;    "&ldquo@" "&Rdquo@" "&lSquo@" "&rsquo@"
;;;    ))


(defvar *lexical-rule-items*
  '("*period*" "*comma*" "*dq*" "*colon*" "*apostrophe*" "*bang*" "*percent*"
    "&ldquo@" "&Rdquo@" "&lSquo@" "&rsquo@" ))

;;; NOTE: We use the list of SGML tags from sgml.lisp
;;; ...not any more, though


(defvar *sgml-tags-with-curlies* nil)
(defvar *sgml-tags-strings* nil)

(setq *sgml-tags-with-curlies*
      (mapcar #'(lambda (x)
		  (format nil "{~A}" x))
	      *sgml-tags-strings*))
			

;;;==================================================================;

;;; TUT-related Functions

;;; TUT-LIST-TO-PSEUDO-VALUE converts a list of values into a  value,
;;; while also changing (FORM (:OR ...)) to (FORM (*OR* ...)) 
;;; (a b c) ==> (*OR* a b c)
;;; (a)     ==>  a
;;;

(defun tut-list-to-pseudo-value (value-list)
  (cond ((null value-list) nil)
	((eq (length value-list) 1)
	 (change-tut-or-to-pseudo-or (car value-list)))
	(t
	 (cons '*OR* (mapcar #'change-tut-or-to-pseudo-or value-list)))))


;;;------------------------------------------------------------------;
;;; Function CHANGE-TUT-OR-TO-PSEUDO-OR
;;; 
;;; Given an f-structure, we very carefully look for a 
;;; :OR and change it to *OR* 

(defun change-tut-or-to-pseudo-or (fs)
  (mapcan #'(lambda (slot)
	      (cond ((and (listp slot)
			  (listp (second slot))
			  (second slot) ; not NIL
			  (or (eq (car (second slot)) :OR)
			      (eq (car (second slot)) '*OR*)))
		     (list (cons (car slot) (list (cons '*OR* (cdr (second slot)))))))
		    
		    ((and (listp slot)
			  (eq (first slot) 'NUMBER)
			  (listp (second slot))
			  (second slot) ; not NIL
			  (> (length (second slot)) 1)
			  (eq (second (second slot)) 'PL))
		     (list (list 'NUMBER 'SG)))

		    ((and (listp slot)
			  (eq (first slot) 'NUMBER)
			  (listp (second slot))
			  (second slot) ; not NIL
			  (> (length (second slot)) 1)
			  (eq (second (second slot)) 'SG))
		     (list (list 'NUMBER 'PL)))		    
		    
		    ((and (listp slot)
			  (listp (second slot))
			  (second slot) ; not NIL
			  (eq :NOT (car (second slot))))
		     NIL)
		    (t ; all normal slots
		     (list slot))))
	  fs))

(defun change-pseudo-or-to-tut-or (fs)
  (mapcan #'(lambda (slot)
	      (cond ((and (listp slot)
			  (listp (second slot))
			  (second slot) ; not NIL
			  (eq (car (second slot)) '*OR*))
		     (list (cons (car slot) (list (cons ':OR (cdr (second slot)))))))
		    (t ; all normal slots
		     (list slot))))
	  fs))


;;;==================================================================;

;;; Number parsing
;;;
;;; Numbers must be of the following form:
;;;
;;; <number>   ::= [ <sign> ] ( <decimal> | <fraction> )
;;; <sign>     ::= ( "+" | "-" )
;;; <decimal>  ::= <digit>+ [ "." ]
;;; <decimal>  ::= <digit>* "." <digit>+
;;; <fraction> ::= <digit>+ "/" <digit>+

(defun make-number-fs (cat sign comma whole-part fract-part)
  "Makes an FS for a number of category CAT with optional SIGN, maybe a COMMA,
WHOLE-PART, and optional FRACT-PART."
  ;; Used to add *token-position* here; now the grammar rule does it.
  (nconc (list (list 'CAT cat)
	       (list 'ROOT cat))
	 (and sign
	      (list (list 'LEADING-SIGN sign)))
	 (and comma
	      (list (list 'PUNCTUATED-NUMBER '+)))
	 (case cat
	   (NUMBER
	    (nconc
	     (list (list 'INTEGER whole-part))
	     (and fract-part
		  (list (list 'DECIMAL fract-part)))))
	   (FRACTION
	    (list (list 'NUMER (nconc
				(list (list 'CAT 'NUMBER)
				      (list 'ROOT 'NUMBER)
				      (list 'INTEGER whole-part))))

		  (list 'DENOM (list (list 'CAT 'NUMBER)
				     (list 'ROOT 'NUMBER)
				     (list 'INTEGER fract-part))))))))

(defun parse-number (object)
  "Parses OBJECT as a number, returning f-structure if any."
  (let (string
	char
	(start 0)
	(comma nil)
	(state :START)
	(cat nil)
	(sign nil)
	(whole-part nil)
	(fract-part nil))
    ;; Set up some chunks of code to paste into the state machine below
    (macrolet ((number-start () '(setf cat 'NUMBER start i))
	       (fraction-start () '(setf cat 'FRACTION start i))
	       (comma-set () '(setf comma i))
	       (comma-ok () '(or (not comma) (= comma (- i 4))))
	       (get-whole-part () '(setf whole-part (delete #\, (subseq string start i))))
	       (get-fract-part () '(setf fract-part (subseq string start i))))
      (and (setf string (typecase object
			  (symbol (symbol-name object))
			  (string object)
			  (otherwise nil)))
	   (dotimes (i (1+ (length string)) t)
	     ;; Iterate over string characters + null terminator
	     (setf char (and (< i (length string))
			     (schar string i)))
	     ;; (format t "~S ~S ~S~%" i state char)
	     (setf state (case state
			   (:START
			    (cond ((null char) :FAIL)
				  ((char= char #\+) (setf sign "+") :SIGN)
				  ((char= char #\-) (setf sign "-") :SIGN)
				  ((char= char #\.) (setf whole-part "0") :PERIOD)
				  ((digit-char-p char) (number-start) :WHOLE)
				  (t :FAIL)))
			   (:SIGN
			    (cond ((null char) :FAIL)
				  ((char= char #\.) (setf whole-part "0") :PERIOD)
				  ((digit-char-p char) (number-start) :WHOLE)
				  (t :FAIL)))
			   (:WHOLE
			    (cond ((null char) (if (comma-ok)
						   (progn (get-whole-part) :SUCCEED)
						 :FAIL))
				  ((char= char #\.) (if (comma-ok)
							(progn (get-whole-part) :PERIOD)
						      :FAIL))
				  ((char= char #\/) (if comma
							:FAIL
						      (progn (get-whole-part) :SLASH)))
				  ((char= char #\,) (if (comma-ok)
							(progn (comma-set) :WHOLE)
						      :FAIL))
				  ((digit-char-p char) :WHOLE)
				  (t :FAIL)))
			   (:PERIOD
			    (cond ((null char) (if cat :SUCCEED :FAIL))
				  ((digit-char-p char) (number-start) :FRACT)
				  (t :FAIL)))
			   (:SLASH
			    (cond ((null char) :FAIL)
				  ((digit-char-p char) (fraction-start) :FRACT)
				  (t :FAIL)))
			   (:FRACT
			    (cond ((null char) (get-fract-part) :SUCCEED)
				  ((digit-char-p char) :FRACT)
				  (t :FAIL)))))
	     (when (eq state :FAIL)
	       (return nil)))
	   ;; We successfully parsed.  Now build the f-structure.
	   (make-number-fs cat sign comma whole-part fract-part)))))


;;;------------------------------------------------------------------;

(defparameter *test-parse-number*
  '(("1" t) (".2" t) ("1." t) ("1.2" t)
    ("/2" nil) ("1/" nil) ("1/2" t)
    ("-1" t) ("-.2" t) ("-1." t) ("-1.2" t)
    ("-/2" nil) ("-1/" nil) ("-1/2" t)
    ("--1" nil) ("1/-1" nil) ("." nil) ("1/1.1" nil) ("" nil)))

(defun test-parse-number (&key (tests *test-parse-number*)
			       (verbose t))
  (let (result)
    (dolist (test tests)
      (setf result (parse-number (make-symbol (first test))))
      (format t "~&~S ~A~%" (first test)
	      (if (second test)
		  (if result
		      "parsed as expected"
		    "FAILED UNEXPECTEDLY")
		(if result
		    "PARSED UNEXPECTEDLY"
		    "failed as expected")))
      (when verbose
	(pns result :newline nil))))) 


;;;==================================================================;

;;; Function PARSE-ENG-WORD
;;; 
;;;  PARSE-ENG-WORD takes a word in any form, analyzes its morphology,
;;;  looks up the dictionary, and returns a f-structure.
;;;
;;; Some examples:
;;;
;;; 1 Enter PARSE-ENG-WORD "angle"
;;; 1 Exit PARSE-ENG-WORD (*OR* ((ROOT "angle") (VALENCY TRANS) (CAT V)) ((ROOT "angle") (CAT N) (COUNT +))) :PARSED
;;;
;;; 1 Enter PARSE-ENG-WORD "the"
;;; 1 Exit PARSE-ENG-WORD ((ROOT "the") (CAT DET)) :PARSED
;;;
;;; 1 Enter PARSE-ENG-WORD "is"
;;; 1 Exit PARSE-ENG-WORD ((ROOT "be") (FORM IS) (CAT V)) :IRREGULAR
;;;
;;; 1 Enter PARSE-ENG-WORD "*period*"
;;; 1 Exit PARSE-ENG-WORD NIL :LEXRULE
;;;

(defun parse-eng-word (object)
  "OBJECT is a symbol or string.  PARSE-ENG-WORD analyzes OBJECT's morphology,
looks it up in the dictionary, and returns any resulting f-structure."
  (let ((result-fs nil)
	(string (typecase object
		  (symbol (string-downcase (symbol-name object)))
		  (string object)
		  (otherwise nil))))
    (and string
	 (cond
	  ;; commas etc. may be in the lexicon, so look them up
	  ;; ((member string *lexical-rule-items* :test #'string-equal)
	  ;; (values nil :lexrule))
	  ;; SGML tags may now be in the lexicon, so look them up
	  ;; ((member string *sgml-tags-with-curlies* :test #'string-equal)
	  ;; (values nil :sgml-tag))
	  ((and *use-word-fs-cache*
		(setq result-fs (word-fs-cache-get string)))
	   (values result-fs :CACHED))
	  (t				; need to analyze word
	   ;; First, get irregular forms if any
	   (setq result-fs (append (eng-irregular-word-fs string)
				   result-fs))
	   ;; Second, get regular forms if any
	   (setq result-fs (append (eng-inflected-word-to-fs string)
				   result-fs))
	   ;; Complain if we don't know the word
	   (if (null result-fs) 
	       (complain-unknown-word string))
	   ;; Turn result into proper f-structure
	   (setq result-fs (tut-list-to-pseudo-value result-fs))
	   ;; Cache the analysis for this word
	   (if *use-word-fs-cache*
	       (word-fs-cache-add string result-fs))
	   ;; Used to add *token-position* here; now the grammar rule does it.
	   (values (copy-tree result-fs) :PARSED))))))

(defun complain-unknown-word (string)
  (when *analyzer-show-warnings*
    (cond ((and (not *analyzer-tag-warning-p*)
		(char= #\{ (char string 0)))) ; it's a tag - do nothing
	  (t 
	   (format *standard-output* "~&[Parser] WARNING: Word ~S not in DMK~%" string)))))


;;;==================================================================;

;;; Function ENG-INFLECTED-WORD-TO-FS
;;; 
;;; Given a possibly inflected word, we find all possible
;;; morphological analyses, and return them as a list
;;; of f-structures.

;;; Input: "plays"
;;; Ouptut: (((cat N) (root "play") (form plural))
;;;          ((cat V) (root "play") (form present3sg)))
;;----------------------------------------------------------------------
;; 29-Jul-96 by EHN -- make this fn pass any additional values
;; returned by PARSE-ENG-MORPH.
;;
(defun eng-inflected-word-to-fs (word)
  (let ((root+suffixes (parse-eng-morph-string word)))
    (mapcan
     #'(lambda (root+suffix)
	 (let* ((root (first root+suffix))
		(suffix (second root+suffix))
		;; 29-Jul-96 by EHN -- save anything else, like +DUBL
		(other-feats (cddr root+suffix))
		(dmk-structs (eng-dmk-entry root)))
	   
	   (case suffix
	     (+ER
	      (append 
	       (suffix-to-form-fs word root 'ADJ dmk-structs '(FORM COMP) other-feats suffix)
	       (suffix-to-form-fs word root 'ADV dmk-structs '(FORM COMP) other-feats suffix)))
	     (+EST
	      (append 
	       (suffix-to-form-fs word root 'ADJ dmk-structs '(FORM SUPER) other-feats suffix)
	       (suffix-to-form-fs word root 'ADV dmk-structs '(FORM SUPER) other-feats suffix)))
	     (+ING
	      (suffix-to-form-fs word root 'V dmk-structs '(FORM PRESPART) other-feats suffix))
	     (+INGS
	      (suffix-to-form-fs word root 'V dmk-structs '(FORM PRESPART-PL) other-feats suffix)) ; paintings 
	     (+ED 
	      (suffix-to-form-fs word root 'V dmk-structs '(FORM (:OR PAST PASTPART)) other-feats suffix))
	     (+S 
	      (append
	       (suffix-to-form-fs word root 'UNIT dmk-structs '(NUMBER PL) other-feats suffix)
	       (suffix-to-form-fs word root 'PROP dmk-structs '(NUMBER PL) other-feats suffix)	       	       
	       (suffix-to-form-fs word root 'V dmk-structs '(FORM PRESENT3SG) other-feats suffix)
	       (suffix-to-form-fs word root 'N dmk-structs '(NUMBER PL) other-feats suffix)))
	     (+NIL
	      (suffix-to-form-fs word root NIL dmk-structs NIL other-feats suffix)))))
     
     root+suffixes)))

#|
(defun eng-inflected-word-to-fs (word)
  (let ((root+suffixes (parse-eng-morph-string word)))
    (mapcan
     #'(lambda (root+suffix)
	 (let* ((root (first root+suffix))
		(suffix (second root+suffix))
		(dmk-structs (eng-dmk-entry root)))
	   
	   (case suffix
	     (+ER
	      (append 
	       (suffix-to-form-fs word root 'ADJ dmk-structs '(FORM COMP))
	       (suffix-to-form-fs word root 'ADV dmk-structs '(FORM COMP))))
	     (+EST
	      (append 
	       (suffix-to-form-fs word root 'ADJ dmk-structs '(FORM SUPER))
	       (suffix-to-form-fs word root 'ADV dmk-structs '(FORM SUPER))))
	     (+ING
	      (suffix-to-form-fs word root 'V dmk-structs '(FORM PRESPART)))
	     (+INGS
	      (suffix-to-form-fs word root 'V dmk-structs '(FORM PRESPART-PL))) ; paintings 
	     (+ED 
	      (suffix-to-form-fs word root 'V dmk-structs '(FORM (:OR PAST PASTPART))))
	     (+S 
	      (append
	       (suffix-to-form-fs word root 'UNIT dmk-structs '(NUMBER PL))
	       (suffix-to-form-fs word root 'PROP dmk-structs '(NUMBER PL))	       	       
	       (suffix-to-form-fs word root 'V dmk-structs '(FORM PRESENT3SG))
	       (suffix-to-form-fs word root 'N dmk-structs '(NUMBER PL))))
	     (+NIL
	      (suffix-to-form-fs word root NIL dmk-structs NIL)))))
     
     root+suffixes))) ; mapcan over this list
|#


;;;==================================================================;

;;; Function SUFFIX-TO-FORM-FS
;;;
;;; This function determines whether a given root+form combination
;;; is valid according to the dictionary.
;;;
;;; Input: surface string, root string, cat, dmk lexicon entries, form value
;;;
;;; Output: complete f-structure for word with that form, or NIL

;;; Procedure:
;;;
;;; 1. get all dict entries with correct category
;;; 2. if no dict entries match, return nil
;;; 3. make list of word f-structures from remaining dmk-structs
;;;    using SYN-FEATURES from dmk entry
;;;          FORM (could be NIL), ROOT, ORTHO from local vars
;;; 4. Unify that list against our FORM value
;;; 5. return resulting list

;; ----------------------------------------------------------------------
;; 10. Patch to morph-analyzer.lisp
;;
;; Fix this so that it prunes extra SEMs at the root -- when the
;; word is first analyzed and its dmk-concept field is accessed.
;; If this works, we can take the patch out of the disambiguator,
;; since that patch doesn't work when a PP attaches to the lexambig
;; item.

;;----------------------------------------------------------------------
;; 29-Jul-96 by EHN -- make this thing smart about +DUBL coming in 
;; OTHER-FEATS variable. Add this var, make it optional in case the above
;; fn doesn't contain all the calls to this fn.
;;
;; 31-Jul-96 by EHN -- add SUFFIX arg.

(defun suffix-to-form-fs (surface root cat dmk-structs features &optional (other-feats nil) (suffix nil))
  (let ((result dmk-structs)
	(surface-different-from-root (if (not (string= root surface))
					 (list (list 'ortho surface)) ; then
					 nil)) ; else
	)
    ;; find all dmk entries with right category, if given
    (if cat
	(setq result
	      (mapcan #'(lambda (dmk-struct)
			  (if (eq (dmk-pos dmk-struct)
				  cat)
			      (list dmk-struct))) ; then keep it
		      result)))

    ;; 29-Jul-96 by EHN -- check for SYL-DOUBLE
    (setq result
	  (mapcan #'(lambda (dmk-struct)
		      
		      (cond
		       ((or
			   ;; 31-Jul-96 by EHN -- if we saw +DUBL, then only pass this reading
			   ;; if the DMK entry calls for SYL-DOUBLE.
			   (and (member '+DUBL other-feats)
				(eq '+ (second (assoc 'syl-double (dmk-syn-features dmk-struct)))))
			   ;; 31-Jul-96 by EHN -- if we didn't see +DUBL, then only pass this reading
			   ;; if we don't have a suffix that requires doubling (+ING, +ED) and a dmk
			   ;; entry that calls for SYL-DOUBLE.
			   (and (not (member '+DUBL other-feats))
				(not (member suffix '(+ING +ED +ER +EST +ING +INGS)))
				(eq '+ (second (assoc 'syl-double (dmk-syn-features dmk-struct)))))
			   ;; 31-Jul-96 by EHN -- fall through.
			   (and (not (member '+DUBL other-feats))
				(not (eq '+ (second (assoc 'syl-double (dmk-syn-features dmk-struct)))))))
			(when *morph-trace*
			      (format t "~%MORPH: Suffix: ~s Other-Feats: ~s Syl-Double: ~s"
				      suffix other-feats 
				      (eq '+ (second (assoc 'syl-double (dmk-syn-features dmk-struct)))))
			      (format t "~%MORPH: Keeping this struct: ~s" (dmk-concept dmk-struct)))
			(list dmk-struct))
		       (t 
			(when *morph-trace*
			      (format t "~%MORPH: Suffix: ~s Other-Feats: ~s Syl-Double: ~s"
				      suffix other-feats 
				      (eq '+ (second (assoc 'syl-double (dmk-syn-features dmk-struct)))))
			      (format t "~%MORPH: Pruning this struct: ~s" (dmk-concept dmk-struct)))
			nil)))
		  result))

    ;; make list of word f-structures from remaining dmk entries
    ;; that unify against local FORM value
    (setq result
	  (mapcan #'(lambda (dmk-struct)
		      (let (result-fs)
			(cond (features ; we must have certain features
			       (let ((*tut-operator-style* :keyword)
				     (num-dmk (assoc 'NUMBER (dmk-syn-features dmk-struct)))
				     (num-ana (assoc 'NUMBER (list features))))
				 (setq result-fs
				       (tree-unify
					(copy-tree (dmk-syn-features dmk-struct))
					(list features)))
				 ;; if both dmk and analysis have (NUMBER PL), then
				 ;; fail this reading
				 (if (and num-dmk num-ana)
				     (setq result-fs *tut-fail-value*))
				 (if (eq *tut-fail-value* result-fs)
				     (setq result-fs NIL) ; then
				     (setq result-fs 
					   (list (cons (list 'CAT (dmk-pos dmk-struct))
						       result-fs))))))
			      (t  ; no prescribed features
			       (setq result-fs 
				     (list (cons (list 'CAT (dmk-pos dmk-struct))
						 (copy-tree (dmk-syn-features dmk-struct)))))))
			;; 28-Sep-93 by pjordan modified to add concept to f-structure
			;; (terpri) (pprint result-fs) (terpri)
			(if (and result-fs (dmk-concept dmk-struct))
			    (list (append (car result-fs)
					  (list (list 'SEM ;;(dmk-concept dmk-struct)
						      ;;  4-Oct-95 by EHN -- filter multi-SEMs, create *OR*
						      (filter-smcs-meanings root (dmk-pos dmk-struct)
									    (dmk-concept dmk-struct))
						      ))))
			    result-fs)))
		  result))
    ;; add ROOT and ORTHO fields to all surviving word f-structures
    (setq result (mapcar #'(lambda (fs)
			     (append (list (list 'ROOT root))
				     ;; next var is NIL or ((ORTHO "foomeisters"))
				     surface-different-from-root 
				     fs))
			 result))
    (values result)))

#|
(defun suffix-to-form-fs (surface root cat dmk-structs features)
  (let ((result dmk-structs)
	(surface-different-from-root (if (not (string= root surface))
					 (list (list 'ortho surface)) ; then
					 nil)) ; else
	)
    ;; find all dmk entries with right category, if given
    (if cat
	(setq result
	      (mapcan #'(lambda (dmk-struct)
			  (if (eq (dmk-pos dmk-struct)
				  cat)
			      (list dmk-struct))) ; then keep it
		      result)))
    ;; make list of word f-structures from remaining dmk entries
    ;; that unify against local FORM value
    (setq result
	  (mapcan #'(lambda (dmk-struct)
		      (let (result-fs)
			(cond (features ; we must have certain features
			       (let ((*tut-operator-style* :keyword)
				     (num-dmk (assoc 'NUMBER (dmk-syn-features dmk-struct)))
				     (num-ana (assoc 'NUMBER (list features))))
				 (setq result-fs
				       (tree-unify
					(copy-tree (dmk-syn-features dmk-struct))
					(list features)))
				 ;; if both dmk and analysis have (NUMBER PL), then
				 ;; fail this reading
				 (if (and num-dmk num-ana)
				     (setq result-fs *tut-fail-value*))
				 (if (eq *tut-fail-value* result-fs)
				     (setq result-fs NIL) ; then
				     (setq result-fs 
					   (list (cons (list 'CAT (dmk-pos dmk-struct))
						       result-fs))))))
			      (t  ; no prescribed features
			       (setq result-fs 
				     (list (cons (list 'CAT (dmk-pos dmk-struct))
						 (copy-tree (dmk-syn-features dmk-struct)))))))
			;; 28-Sep-93 by pjordan modified to add concept to f-structure
			;; (terpri) (pprint result-fs) (terpri)
			(if (and result-fs (dmk-concept dmk-struct))
			    (list (append (car result-fs)
					  (list (list 'SEM ;;(dmk-concept dmk-struct)
						      ;;  4-Oct-95 by EHN -- filter multi-SEMs, create *OR*
						      (filter-smcs-meanings root (dmk-pos dmk-struct)
									    (dmk-concept dmk-struct))
						      ))))
			    result-fs)))
		  result))
    ;; add ROOT and ORTHO fields to all surviving word f-structures
    (setq result (mapcar #'(lambda (fs)
			     (append (list (list 'ROOT root))
				     ;; next var is NIL or ((ORTHO "foomeisters"))
				     surface-different-from-root 
				     fs))
			 result))
    (values result)))
|#

;;;==================================================================;

;;; Function SUFFIX-P
;;; 
;;; Is suffix a suffix of string?
;;;
;;; (suffix-p "foo" 3 "oo" 2) ==> T
;;; (suffix-p "foo" 3 "meister" 6) ==> NIL


(defun suffix-p (string str-len suffix suff-len)
  (when (<= suff-len str-len)
    (not (mismatch string suffix :from-end t :test #'char=
		   :start1 (- str-len suff-len)))))


(defun second-to-last-char (word len)
  (char word (- len 2)))

(defun third-to-last-char (word len)
  (char word (- len 3)))

(defun fourth-to-last-char (word len)
  (char word (- len 4)))

(defun fifth-to-last-char (word len)
  (char word (- len 5)))

(defun sixth-to-last-char (word len)
  (char word (- len 6)))

(defun strip-last-n-chars (word len n)
  (subseq word 0 (- len n)))


;;;==================================================================;

;;; Function PARSE-ENG-MORPH-STRING
;;; 
;;;  (PARSE-ENG-MORPH analyzes morphology of a given word,
;;;  without consulting the dictionary.
;;;
;;;
;;; Some examples:
;;;
;;; | 2 Enter PARSE-ENG-MORPH "truck"
;;; | 2 Exit PARSE-ENG-MORPH (("truck" +NIL))
;;;
;;; | 2 Enter PARSE-ENG-MORPH "parked"
;;; | 2 Exit PARSE-ENG-MORPH (("parked" +NIL) ("parke" +ED) ("park" +ED))
;;;
;;----------------------------------------------------------------------
;; 29-Jul-96 by EHN -- make this thing return something
;; additional when syl-double is found.

(defun parse-eng-morph-string (word)
  (let ((len (length word))
	result-list)
    
  ;;  Verb +S   and Noun +S
  ;;
  (when (and (< 1 len)
	     (suffix-p word len "s" 1)
  	     (not (member (second-to-last-char word len)
			  '(#\s #\z #\x))))
    
   (push (list (strip-last-n-chars word len 1) '+S) result-list))

  (when (and (< 2 len)
	     (suffix-p word len "es" 2)
	     (member (third-to-last-char word len)
		     '(#\s #\z #\h #\u #\o #\x)))
   (push (list (strip-last-n-chars word len 2) '+S) result-list))

  (when (and (< 4 len)
	     (suffix-p word len "ves" 3)
   (push (list (concatenate 'string (strip-last-n-chars word len 3) "f") '+S) result-list)))

  (when (and (< 4 len)
	     (suffix-p word len "ies" 3)
	     (not (member (fourth-to-last-char word len)
			  '(#\a #\i #\u #\e #\o))))
   (push (list (concatenate 'string (strip-last-n-chars word len 3) "y") '+S) result-list))

  ;;   Adj, Adv +ER
  ;;
  (when (and (< 3 len)
	     (suffix-p word len "er" 2))
    (push (list (strip-last-n-chars word len 2) '+ER) result-list)

    (if (char= (third-to-last-char word len)
	       (fourth-to-last-char word len))
	;; 29-Jul-96 by EHN -- syl-double.
	(push (list (strip-last-n-chars word len 3) '+ER '+DUBL) result-list) ; then
	(push (list (strip-last-n-chars word len 1) '+ER) result-list)) ; else

    (if (and (char= (third-to-last-char word len) #\k)
	    (char= (fourth-to-last-char word len) #\c))
	(push (list (strip-last-n-chars word len 3) '+ER) result-list))
    
    (if (char= (third-to-last-char word len) #\i) 
	(push (list (concatenate 'string (strip-last-n-chars word len 3) "y") '+ER) result-list)))

  ;;   Adj, Adv +EST
  ;;
  (when (and (< 4 len)
	     (suffix-p word len "est" 3))
    (push (list (strip-last-n-chars word len 3) '+EST) result-list)

   (if (eq (fourth-to-last-char word len)
	   (fifth-to-last-char word len))
       ;; 29-Jul-96 by EHN -- syl-double.
       (push (list (strip-last-n-chars word len 4) '+EST '+DUBL) result-list) ; then
       (push (list (strip-last-n-chars word len 2) '+EST) result-list)) ; else
   
   (if (and (char= (fourth-to-last-char word len) #\c)
	    (char= (fifth-to-last-char word len) #\k))
       (push (list (strip-last-n-chars word len 4) '+EST) result-list)) ; then

   (if (char= (fourth-to-last-char word len) #\i)
       (push (list (concatenate 'string (strip-last-n-chars word len 4) "y") '+EST) result-list)))
  
  ;;   Verb +ED
  ;;
  (when (and (< 3 len)
	     (suffix-p word len "ed" 2))
    (push (list (strip-last-n-chars word len 2) '+ED) result-list)

   (if (char= (third-to-last-char word len)
	      (fourth-to-last-char word len))
       ;; 29-Jul-96 by EHN -- syl-double.
       (push (list (strip-last-n-chars word len 3) '+ED '+DUBL) result-list) ; then
       (push (list (strip-last-n-chars word len 1) '+ED) result-list)) ; else

   (if (and (char= (third-to-last-char word len) #\k)
	    (char= (fourth-to-last-char word len) #\c))
       (push (list (strip-last-n-chars word len 3) '+ED) result-list))
   
   (if (char= (third-to-last-char word len) #\i)
       
       (push (list (concatenate 'string (strip-last-n-chars word len 3) "y") '+ED) result-list)))

  ;;   Verb +ING
  ;;
  (when (and (< 4 len)
	     (suffix-p word len "ing" 3))
   (push (list (strip-last-n-chars word len 3) '+ING) result-list)
   (if (char= (fourth-to-last-char word len)
	      (fifth-to-last-char word len))
       ;; 29-Jul-96 by EHN - syl-double.
       (Push (list (strip-last-n-chars word len 4) '+ING '+DUBL) result-list) ; then
       (push (list (concatenate 'string (strip-last-n-chars word len 3) "e") '+ING) result-list)) ; else

   (if (and (char= (fourth-to-last-char word len) #\k)
	    (char= (fifth-to-last-char word len) #\c))
       (push (list (strip-last-n-chars word len 4) '+ING) result-list))

   (if (char= (fourth-to-last-char word len) #\y)
       (push (list (concatenate 'string (strip-last-n-chars word len 4) "ie") '+ING) result-list)))

  ;;   Verb +INGS
  ;;     Example: paintings
  ;;
  
  (when (and (< 5 len)
	     (suffix-p word len "ings" 4))
   (push (list (strip-last-n-chars word len 4) '+INGS) result-list)
   (if (char= (fifth-to-last-char word len)
	      (sixth-to-last-char word len))
       ;; 29-Jul-96 by EHN - syl-double.
       (push (list (strip-last-n-chars word len 5) '+INGS '+DUBL) result-list) ; then
       (push (list (concatenate 'string (strip-last-n-chars word len 4) "e") '+INGS) result-list)) ; else

   (if (and (char= (fifth-to-last-char word len) #\k)
	    (char= (sixth-to-last-char word len) #\c))
       (push (list (strip-last-n-chars word len 5) '+INGS) result-list))

   (if (char= (fifth-to-last-char word len) #\y)
       (push (list (concatenate 'string (strip-last-n-chars word len 5) "ie") '+INGS) result-list)))
  
 (values (cons (list word '+NIL) result-list))))

#|
(defun parse-eng-morph-string (word)
  (let ((len (length word))
	result-list)
    
  ;;  Verb +S   and Noun +S
  ;;
  (when (and (< 1 len)
	     (suffix-p word len "s" 1)
  	     (not (member (second-to-last-char word len)
			  '(#\s #\z #\x))))
    
   (push (list (strip-last-n-chars word len 1) '+S) result-list))

  (when (and (< 2 len)
	     (suffix-p word len "es" 2)
	     (member (third-to-last-char word len)
		     '(#\s #\z #\h #\u #\o #\x)))
   (push (list (strip-last-n-chars word len 2) '+S) result-list))

  (when (and (< 4 len)
	     (suffix-p word len "ves" 3)
   (push (list (concatenate 'string (strip-last-n-chars word len 3) "f") '+S) result-list)))

  (when (and (< 4 len)
	     (suffix-p word len "ies" 3)
	     (not (member (fourth-to-last-char word len)
			  '(#\a #\i #\u #\e #\o))))
   (push (list (concatenate 'string (strip-last-n-chars word len 3) "y") '+S) result-list))

  ;;   Adj, Adv +ER
  ;;
  (when (and (< 3 len)
	     (suffix-p word len "er" 2))
    (push (list (strip-last-n-chars word len 2) '+ER) result-list)

    (if (char= (third-to-last-char word len)
	       (fourth-to-last-char word len))
	(push (list (strip-last-n-chars word len 3) '+ER) result-list) ; then
	(push (list (strip-last-n-chars word len 1) '+ER) result-list)) ; else

    (if (and (char= (third-to-last-char word len) #\k)
	    (char= (fourth-to-last-char word len) #\c))
	(push (list (strip-last-n-chars word len 3) '+ER) result-list))
    
    (if (char= (third-to-last-char word len) #\i) 
	(push (list (concatenate 'string (strip-last-n-chars word len 3) "y") '+ER) result-list)))

  ;;   Adj, Adv +EST
  ;;
  (when (and (< 4 len)
	     (suffix-p word len "est" 3))
    (push (list (strip-last-n-chars word len 3) '+EST) result-list)

   (if (eq (fourth-to-last-char word len)
	   (fifth-to-last-char word len))
       (push (list (strip-last-n-chars word len 4) '+EST) result-list) ; then
       (push (list (strip-last-n-chars word len 2) '+EST) result-list)) ; else
   
   (if (and (char= (fourth-to-last-char word len) #\c)
	    (char= (fifth-to-last-char word len) #\k))
       (push (list (strip-last-n-chars word len 4) '+EST) result-list)) ; then

   (if (char= (fourth-to-last-char word len) #\i)
       (push (list (concatenate 'string (strip-last-n-chars word len 4) "y") '+EST) result-list)))
  
  ;;   Verb +ED
  ;;
  (when (and (< 3 len)
	     (suffix-p word len "ed" 2))
    (push (list (strip-last-n-chars word len 2) '+ED) result-list)

   (if (char= (third-to-last-char word len)
	      (fourth-to-last-char word len))
       (push (list (strip-last-n-chars word len 3) '+ED) result-list) ; then
       (push (list (strip-last-n-chars word len 1) '+ED) result-list)) ; else

   (if (and (char= (third-to-last-char word len) #\k)
	    (char= (fourth-to-last-char word len) #\c))
       (push (list (strip-last-n-chars word len 3) '+ED) result-list))
   
   (if (char= (third-to-last-char word len) #\i)
       
       (push (list (concatenate 'string (strip-last-n-chars word len 3) "y") '+ED) result-list)))

  ;;   Verb +ING
  ;;
  (when (and (< 4 len)
	     (suffix-p word len "ing" 3))
   (push (list (strip-last-n-chars word len 3) '+ING) result-list)
   (if (char= (fourth-to-last-char word len)
	      (fifth-to-last-char word len))
       
       (Push (list (strip-last-n-chars word len 4) '+ING) result-list) ; then
       (push (list (concatenate 'string (strip-last-n-chars word len 3) "e") '+ING) result-list)) ; else

   (if (and (char= (fourth-to-last-char word len) #\k)
	    (char= (fifth-to-last-char word len) #\c))
       (push (list (strip-last-n-chars word len 4) '+ING) result-list))

   (if (char= (fourth-to-last-char word len) #\y)
       (push (list (concatenate 'string (strip-last-n-chars word len 4) "ie") '+ING) result-list)))

  ;;   Verb +INGS
  ;;     Example: paintings
  ;;
  
  (when (and (< 5 len)
	     (suffix-p word len "ings" 4))
   (push (list (strip-last-n-chars word len 4) '+INGS) result-list)
   (if (char= (fifth-to-last-char word len)
	      (sixth-to-last-char word len))
       
       (push (list (strip-last-n-chars word len 5) '+INGS) result-list) ; then
       (push (list (concatenate 'string (strip-last-n-chars word len 4) "e") '+INGS) result-list)) ; else

   (if (and (char= (fifth-to-last-char word len) #\k)
	    (char= (sixth-to-last-char word len) #\c))
       (push (list (strip-last-n-chars word len 5) '+INGS) result-list))

   (if (char= (fifth-to-last-char word len) #\y)
       (push (list (concatenate 'string (strip-last-n-chars word len 5) "ie") '+INGS) result-list)))
  
 (values (cons (list word '+NIL) result-list))))
|#  


;;;==================================================================;

;;; word-roots returns all the possible root strings for
;;; a surface string.
;;;
;;; input: surface string
;;; output: list of strings that could be roots of the surface string
;;;
;;; Todo: Need to get irreg words, too.


(defun word-roots (word)
  (append (mapcar #'first (parse-eng-morph-string word))
	  (eng-irreg-word-root word)))
  

;;;------------------------------------------------------------------;
;;;
;;; word-root-pos returns the first root string with the right
;;; POS for the given surface string.

(defun word-root-pos (string pos)
  (let ((roots (word-roots string)))
    (dolist (root roots)
      (if (eng-dmk-entry-cat root pos)
	  (return-from word-root-pos root)))
    ;; no root matched with the right POS
    (values NIL)))
    
		
    
