;;; -*- Mode: Lisp; Package: PNODE -*-

;;; $Header: /afs/athena.mit.edu/course/other/allp/nlp/tools/RCS/pnode.lisp,v 3.3 92/07/08 16:29:07 sfelshin Exp $

;;; Copyright (c) 1986-88, 1991-92 Massachusetts Institute of Technology.
;;; All rights reserved.
;;; Today's code was brought to you by the letter T and the symbol NIL
;;; and by the Athena Language Learning Project.

(in-package :pnode)

;;; Non-graphical parse tree printing and debugging tools.
(provide-module :pnode)

#||
(defpackage :pnode
  #.`(:use ,@*base-packages*))
||#

#||
(global '(pnode pcf *pnode-snords* *pnode-new-bindings* *pnode-old-bindings*
	  *pnode-attend* *pnode-underlying-strings* *pnode-features*)
||#

(require-module :useful :macros t)
#||(use-package :useful)||#

(require-module :iter :macros t)
#||(use-package :iter)||#

(require-module :char-map)
(require-module :transliterate :macros t)
#||(use-package :transliterate)||#

(require-module :feat-support :read-macros t)
(require-module :features :compile nil)
#||(use-package :features)||#

(require-module :snag-support)
(require-module :snags)
#||(use-package :snags)||#

(require-module :pstates :accessors t)
;;; We need pstate-support for at least PS-TOP, which is an inline.  If
;;; we compile module pnode in a non-parsing lisp, PS-TOP won't be compiled
;;; inline.  There must be a better way.
(require-module :pstate-support :inlines t :when (load-subsystem-p :parsing))
#||(use-package :parser)||#

(require-module :cf-structs :accessors t)
(require-module :cf-snags)		; for AKO-DRIFT
#||(use-package :cf)||#

(require-module :nords :accessors t)
(require-module :nord-support)
(require-module :snord)
#||(use-package :grammar)||#

(require-module :transfer-functions)
#||(use-package :transfer-functions)||#

;;; For *GEN-BOX*.
(require-module :gen-structs :when (load-subsystem-p :generation))
#||(use-package :generation)||#


;;;; ================================================================
;;;; Tree printing.
;;;; ================================================================

;;; PNODE and PCF could be much more simply rewritten using the pretty
;;; printer.  I am not doing so in case this system is ever run in a Common
;;; Lisp with no pretty printer.  Any decent environment will make
;;; graphical display possible, so these functions will only be used in
;;; impoverished lisp environments.  But with the pretty printer, it would
;;; be easy to add display of snag names, features, and all that other
;;; good stuff.

;;; Don't reuse the gnode variables so that this can be run without gnode
;;; being loaded.
(defvar *pnode-snords* t)
(defvar *pnode-new-bindings* t)
(defvar *pnode-old-bindings* t)
(defvar *pnode-attend* nil)
(defvar *pnode-features* nil)

(defvar *pnode-underlying-strings* nil)


;;; ================================================================
;;; Subscripts for coindexing.

;;; Copied from and identical to code in modules gnode-support and gnode.

;;; The table is used by various functions but they have their own
;;; algorithms for assigning subscripts, based on their local definitions
;;; of "coindexed".
(defvar *subscript-table* (make-hash-table :test #'eq))
(defvar *current-subscript* -1)

(defun node-subscript (node)
  (gethash node *subscript-table*))

(defun reset-subscripts ()
  (clrhash *subscript-table*)
  (setq *current-subscript* -1))

(defun subscript-char (node)
  (code-char (+ (char-code #\i) (node-subscript node))))

(defun assign-subscripts (node snord-box)
  (declare (type nord node) (type (or null snord-box) snord-box))
  (when (and *pnode-snords* snord-box)
    (setq node (snord node snord-box)))
  (let (binder)
    (when (and *pnode-new-bindings* snord-box)
      (setq binder (sb-immediate-binder node snord-box)))
    (when (and (not binder) *pnode-old-bindings* (node-p node)
	       (node-binding node))
      (setq binder (node-binding node)))
    (when binder
      (when (and *pnode-snords* snord-box)
	(setq binder (snord binder snord-box)))
      (if (and (gethash node *subscript-table*)
	       (gethash binder *subscript-table*))
	  ;; This solves a problem where four nodes are ordered
	  ;;   A B C D
	  ;; and D is bound to B, B to A, and C to D.  Before, when we
	  ;; hit B, it and A would get the subscript i.	 When we hit
	  ;; C, it and D would get j.  When we hit D, its subscript
	  ;; (and thus B's as well) would get bashed to j.
	  (let ((right (min (gethash node *subscript-table*)
			    (gethash binder *subscript-table*)))
		(wrong (max (gethash node *subscript-table*)
			    (gethash binder *subscript-table*))))
	    (maphash #'(lambda (key elt)
			 (when (eq elt wrong)
			   (setf (gethash key *subscript-table*) right)))
		     *subscript-table*))
	(let ((subscript (or (gethash node *subscript-table*)
			     (gethash binder *subscript-table*)
			     (incf *current-subscript*))))
	  (setf (gethash node *subscript-table*) subscript)
	  (setf (gethash binder *subscript-table*) subscript)))))
  (when (node-p node)
    (dolist (child (node-children node))
      (assign-subscripts child snord-box))))


;;; ================================================================
;;; Handle snags.

;;; Straight from module gnode.
(defun boring-snag-p (snag)
  (and (not *pnode-attend*)
       (or (typep (cdr snag) 'ignore-snag)
	   (typep (cdr snag) 'ako-drift))))

;;; Cribbed from module gnode, with changes.
(defun my-cf-snags (node snord-box current-pred)
  (declare (type nord node) (type (or null snord-box) snord-box))
  (when (and *pnode-snords* snord-box)
    (setq node (snord node snord-box)))
  (let ((cf (node-cf node)))
    (when (and cf
	       (not (eq (case-frame-pred cf) current-pred))
	       (case-frame-tmappings cf))
      (remove-if
	#'boring-snag-p
	(tmapping-snags (first (case-frame-tmappings cf)))))))

(defun my-nord-snags (node snord-box)
  (declare (type nord node) (type (or null snord-box) snord-box))
  (when (and *pnode-snords* snord-box)
    (setq node (snord node snord-box)))
  (remove-if #'boring-snag-p (nord-snags node)))

;;; Based on module gnode-support's LOCAL-PENALTY, but only returns penalty
;;; and tallies both node and cf snags.
(defun local-penalty (nord snord-box current-pred)
  (declare (list snags) (special *reported-snags*))
  (let ((penalty 0))
    (declare (fixnum penalty))
    (dolist (snag (my-nord-snags nord snord-box))
      (when (not (member (cdr snag) *reported-snags* :test #'eq))
	(push (cdr snag) *reported-snags*)
	(incf penalty (car snag))))
    (when (node-p nord)
      (dolist (snag (my-cf-snags nord snord-box current-pred))
	(when (not (member (cdr snag) *reported-snags* :test #'eq))
	  (push (cdr snag) *reported-snags*)
	  (incf penalty (car snag)))))
    penalty))


;;; ================================================================

(defiter pstate-nodes (ps)
  (labels ((down (c)
	     (when c
	       (down (cddr c))
	       (yield (car c)))))
    (down (cdr (ps-children ps)))))


;;; This function bares a striking similarity to module gnode-support's
;;; WORD-STRING-AND-FACE.
(defun word-string (word snord-box)
  (declare (type word word))
  (cond ((if *pnode-underlying-strings*
	     (plusp (length (word-underlying-string word)))
	   (plusp (length (word-surface-string word))))
	 (transliterate-string
	  (if *pnode-underlying-strings*
	      (word-underlying-string word)
	    (word-surface-string word))
	  lexicon-internal lexicon-external))
	((and (boundp '!pwo) (labeled-p word !pwo))
	 "pro")
	((and *pnode-new-bindings*
	      snord-box
	      (sb-immediate-binder word snord-box))
	 (cond
	   ((labeled-p word !trace)
	    "t")
	   (t
	    ;; This should never happen!
	    "PRO")))
	;; Prefer imeanings on generation because dmeaning slot must be
	;; simple so imeaning is more interesting/accurate.  Could look
	;; at derivation, but this is D- or S-structure, not
	;; M-structure.
	((and (gword-p word)
	      (gword-imeaning word))
	 (if (consp (gword-imeaning word))
	     (format nil "~(~{~A~^ ~}~)" (gword-imeaning word))
	   (string-downcase (gword-imeaning word))))
	((word-dmeaning word)
	 (string-downcase (word-dmeaning word)))
	;; Any better ideas?
	(t
	 (string-downcase (word-type word)))))


(declaim (ftype (function (nord (or null snord-box)) fixnum) my-flat))

(defun my-flat (nord snord-box)
  (etypecase nord
    (word
     (+ (length (word-string nord snord-box))
	(if (node-subscript nord)
	    3
	  0)
        (if *pnode-features*
          (the fixnum
               (1+ (length
                    (with-output-to-string (stream)
                      (format-features stream (node-features nord))))))
          0)))
    (node
     ;; (print (local-penalty nord snord-box (node-pred nord)))
     (let* ((penalty (local-penalty nord snord-box (node-pred nord)))
	    (size (the fixnum
		       (+ (the fixnum (length (symbol-name (node-type nord))))
			  (if (plusp penalty)
			      (the fixnum
				   ;; Add 1 for the space before the penalty.
				   (1+ #+ccl-2 (truncate (log penalty 10))
                                    #-ccl-2 (the (values fixnum t)
                                                 (truncate (log penalty 10)))))
			    0)
			  (if (node-subscript nord)
			      3
			    0)
			  ;; Add 2 for the open and close brackets.
			  2))))
       (declare (fixnum size))
       (when *pnode-features*
         (setq size
               (the fixnum
                    (+ size 1
                       (length
                        (with-output-to-string (stream)
                          (format-features stream (node-features nord))))))))
       (dolist (child (node-children nord))
	 ;; Add 1 for the space before the child.
	 (setq size (+ size (my-flat child snord-box) 1)))
       size))))

(declaim (fixnum *right-margin*))
(defparameter *right-margin* 80)

;;; These attributes controlled color and flashing in GCLisp.
(defparameter *bracket-attribute* 11)
(defparameter *text-attribute* 15)
(defparameter *type-attribute* 14)

;;; Display node type or word string followed by adjacent subscript in
;;; parens, if any.  Display node penalty before node, if positive, but
;;; don't display word penalty.  Display children after node, with entire
;;; node output wrapped in square brackets.
(defun pnode (nodes &key
 		    ((:snords *pnode-snords*) *pnode-snords*)
		    ((:underlying-strings *pnode-underlying-strings*)
		     *pnode-underlying-strings*)
		    ((:old-bindings *pnode-old-bindings*)
		     *pnode-old-bindings*)
		    ((:new-bindings *pnode-new-bindings*)
		     *pnode-new-bindings*)
		    ;; May be null, a snord-box, or a pstate.
		    (box (if (eq *direction* :generation)
			     *gen-box*))
		    &aux (current-column 0))
  (declare (fixnum current-column))
  (unless (listp nodes)
    (setq nodes (list nodes)))
  (labels
      ((pn (node current-pred extra)
	 (when (and *pnode-snords* box)
	   (setq node (snord node box)))
	 (etypecase node
	   (word
	    (let ((string (word-string node box)))
	      (declare (string string))
	      (with-attribute *text-attribute*
		(write-string (word-string node box)))
	      (the fixnum (incf current-column
				(the fixnum (length string))))
	      (when (node-subscript node)
		(write-char #\()
		(write-char (subscript-char node))
		(write-char #\))
		(the fixnum (incf current-column 3)))
              (when *pnode-features*
                (write-char #\ )
                (format-features t (node-features node)))))
	   (node
	    ;; Print the node's type and penalty on this line whether they
	    ;; fit or not.  If they don't fit, there's nowhere else to put
	    ;; them.
	    (with-attribute *bracket-attribute*
	      (write-char #\[))
	    (the fixnum (incf current-column))
	    (let ((string (symbol-name (node-type node)))
		  (penalty (local-penalty node box current-pred))
		  ;; LET this before we go incrementing the current column.
		  ;; We won't need it if the node will fit on this line but
		  ;; it's too much trouble to worry about that and not much
		  ;; trouble to let it.
		  ;; Indenting by 2 seems a nice amount.
		  (left-margin (the fixnum (+ current-column 2)))
		  (cf (node-cf node)))
	      (declare (fixnum penalty left-margin) (string string))
	      ;; Penalty may be zero even though snags are present.
	      ;; Nothing is displayed.
	      (when (plusp penalty)
		(with-attribute *snag-color*
		  (write penalty))
		(write-char #\space)
		(the fixnum
		     (incf current-column
			   (the fixnum (1+ #+ccl-2 (truncate (log penalty 10))
                                        #-ccl-2 (the (values fixnum t)
                                                     (truncate
                                                      (log penalty 10))))))))
	      (with-attribute *type-attribute*
		(write-string string))
	      (when (node-subscript node)
		(write-char #\()
		(write-char (subscript-char node))
		(write-char #\))
		(the fixnum (incf current-column 3)))
	      (the fixnum (incf current-column
				(the fixnum (length string))))
	      ;; Put the children on the same line only if the entire node,
	      ;; plus all the close brackets that come after it, all fit.
	      (cond
		;; Won't fit on this line.
		((> (+ current-column (my-flat node box) extra)
		    *right-margin*)
                 (when *pnode-features*
                   (terpri)
                   (setq current-column 0)
                   (print-spaces left-margin)
		   (the fixnum (incf current-column left-margin))
                   (format-features t (node-features node)))
		 (do ((kids (node-children node) (rest kids)))
		     ((endp kids))
		   (terpri)
		   (setq current-column 0)
		   (print-spaces left-margin)
		   (the fixnum (incf current-column left-margin))
		   (pn (first kids)
		       (and cf (case-frame-pred cf))
		       (if (endp (rest kids))
			   ;; If this is the last kid, it needs to fit
			   ;; the parent's bracket after it.
			   (+ extra 1)
			 0))))
		;; Will fit on this line.
		(t
                 (when *pnode-features*
                   (write-char #\ )
                   (format-features t (node-features node)))
		 (dolist (kid (node-children node))
		   (write-char #\ )
		   (the fixnum (incf current-column))
		   (pn kid (and cf (case-frame-pred cf)) 0)))))
	    (with-attribute *bracket-attribute*
	      (write-char #\]))
	    (the fixnum (incf current-column))))))
    (dolist (node nodes)
      (let ((pbox box))
	(when (pstate-p node)
	  (setq pbox node)
	  (when (and (= (length (ps-children pbox)) 3)
		     (ps-current-word pbox)
		     (eq (word-type (ps-current-word pbox))
			 :end-of-input))
	    ;; The parse appears to be done, so only display the
	    ;; single node.
	    (setq node (ps-top pbox))))
	(reset-subscripts)
	(let ((*reported-snags* ()))
	  (declare (special *reported-snags*))
	  (etypecase node
	    (nord
	     (assign-subscripts node pbox)
	     (terpri)
	     (setq current-column 0)
	     (pn node nil 0))
	    (pstate
	     (iterate (n (pstate-nodes node))
	       (assign-subscripts n pbox))
	     (setq node (make-node :type :parser-stack
				   :children
				   (let ((temp '()))
				     (iterate (n (pstate-nodes node))
				       (push n temp))
				     (let ((n (ps-current-word node)))
				       (when n
					 (assign-subscripts n pbox)
					 (push n temp)))
				     (nreverse temp))))
	     (pn node nil 0))))))
    (reset-subscripts)
    nodes))


;;; Doesn't handle coindexing.  Doesn't print snag names.  Missing a few
;;; other fancy features that GCF has.
(defun pcf (nodes &key box &aux (true-box box))
  (declare (type (or null snord-box) box true-box))
  (labels
      ((pcf-terpri (left-margin)
	 (terpri)
	 (print-spaces left-margin))
       (pcf-meaning (left-margin prefix symbol)
	 (pcf-terpri left-margin)
	 (write-string prefix)
	 (write-string (string-downcase symbol)))
       (pcf-prin1 (left-margin prefix symbol)
	 (pcf-terpri left-margin)
	 (write-string prefix)
	 (prin1 symbol))
       (pcf2 (nord left-margin)
	 (when (and *pnode-snords* true-box)
	   (setq nord (snord nord true-box)))
	 (etypecase nord
	   (word
	    (pcf-meaning left-margin "" (word-dmeaning nord)))
	   (node
	    (let ((cf (node-cf nord))) 
	      (when (and (null cf)
			 *pnode-new-bindings*
			 true-box)
		;; If theta-binder is PRO, still won't have CF, but that's
		;; okay.
		(setq nord (theta-binder nord true-box)
		      cf (node-cf nord))) 
	      (cond
		(cf
		 (let ((pred (case-frame-pred cf)))
		   ;; Print dmeanings of predicate and ancillary case frame
		   ;; stuff.
		   (pcf-meaning left-margin "PRED: " (known-meaning pred))
		   (let ((rel (case-frame-rel cf))
			 (spec (case-frame-spec cf)))
		     (when rel
		       (pcf-meaning left-margin "REL: " (word-dmeaning rel)))
		     (when spec
		       (pcf-terpri left-margin)
		       (write-string "SPEC: ")
		       (if (node-p spec)
			   (write-features (node-features spec))
			 (write-string (string-downcase
					(word-dmeaning spec))))))
		   (when (case-frame-mods cf)
		     (pcf-terpri left-margin)
		     (write-string "MODS: ")
		     (dolist (node (reverse (case-frame-mods cf)))
		       (pcf2 node (+ left-margin 2))))
		   (when (case-frame-tmappings cf)
		     (pcf-tmapping (first (case-frame-tmappings cf))
				   left-margin))))
		;; PRO isn't supposed to have a case frame so pass it
		;; through to the next clause.
		((and (labeled-p nord !pro)
		      *pnode-old-bindings*
		      (node-binding nord))
		 (pcf2 (node-binding nord) (+ left-margin 2)))
		;; Node with no case frame.
		(t
		 (pcf-terpri left-margin)
		 ;; Use prin1 to keep the colon.
		 (prin1 (node-type nord))))))))
       (pcf-tmapping (tmapping left-margin)
	 (when (plusp (tmapping-penalty tmapping))
	   (pcf-prin1 left-margin "PENALTY: " (tmapping-penalty tmapping)))
	 (pcf-prin1 left-margin "VOICE: "
		    (theta-grid-voice (tmapping-grid tmapping)))
	 (pcf-meaning left-margin "MEANING: " (tmapping-meaning tmapping))
	 (when (tmapping-alist tmapping)
	   (pcf-terpri left-margin)
	   (write-string "ARGS: "))
	 (incf left-margin 2)
	 (dolist (am (tmapping-alist tmapping))
	   (let ((node (arg-match-node am))
		 (arg-spec (arg-match-arg-spec am))
		 (role (arg-match-theta-role am)))
	     (pcf-prin1 left-margin "ROLE: " role)
	     (let ((left-margin (+ left-margin 2)))
	       (when (arg-spec-case arg-spec)
		 (pcf-prin1 left-margin "CASE: " (arg-spec-case arg-spec)))
	       (when (arg-spec-type arg-spec)
		 (pcf-prin1 left-margin "TYPE: " (arg-spec-type arg-spec)))
	       (if (free-arg-spec-p arg-spec)
		   (when (free-arg-spec-features arg-spec)
		     (pcf-terpri left-margin)
		     (write-string "FEATURES: ")
		     (write-features (arg-spec-features arg-spec)))
		 (pcf-meaning left-margin "FIXED: "
			      (let ((nord (fixed-arg-spec-nord arg-spec)))
				(if (word-p nord)
				    (word-dmeaning nord)
				  "..."))))
	       (when (arg-spec-ako arg-spec)
		 (pcf-prin1 left-margin "AKO: " (arg-spec-ako arg-spec)))
	       (when (arg-spec-not-ako arg-spec)
		 (pcf-prin1 left-margin "NOT-AKO: "
			    (arg-spec-not-ako arg-spec)))
	       (pcf2 node (+ left-margin 2)))))
	 (dolist (misfit (tmapping-misfits tmapping))
	   (let ((node (misfit-node misfit)))
	     (pcf-prin1 left-margin "MISFIT, case " (misfit-case misfit))
	     (write-string ": ")
	     (pcf2 node (+ left-margin 2))))
	 (when (listp (tmapping-postponed tmapping))
	   (dolist (post (tmapping-postponed tmapping))
	     (pcf-prin1 left-margin "POSTPONED, case "
			(cf::postponement-case post))
	     (write-string ": ???")
	     (pcf2 (cf::postponement-node post) (+ left-margin 2))))))
    (unless (listp nodes)
      (setq nodes (list nodes)))
    (dolist (node nodes)
      (when (pstate-p node)
	(setq true-box node)
	(when (and (= (length (ps-children true-box)) 3)
		   (ps-current-word true-box)
		   (eq (word-type (ps-current-word true-box))
		       :end-of-input))
	  ;; The parse appears to be done, so only display the
	  ;; single node.
	  (setq node (ps-top true-box))))
      (case (nord-type node)
	(:paragraph
	 (pcf-terpri 0)
	 (write-string "PARAGRAPH: ")
	 (dolist (child (node-children node))
	   ;; Spanish production looks like
	   ;;   PARAGRAPH => SENTENCE* FINAL-SENTENCE
	   ;; so first child may be :empty -- specially don't print
	   ;; any case frame for this one, but for convenience, just
	   ;; don't print cf of any empty child.
	   (unless (eq (nord-type child) :empty)
	     (pcf2 child 2))))
	(t
	 (let ((*reported-snags* ()))
	   (declare (special *reported-snags*))
	   (pcf2 node 0)))))))
