(in-package 'user)


;;; 
;;; Patches for XCHART
;;;


;;; - added time measurement
;;; - changed the fs-slot used to identify the category: cat is used by
;;;   the grammar, nont is used by the parser;
;;; - grammar rules are stored in a hash table;
;;; - arcs are stored in a hash table;


;;----------------------------------------------------------------------
;; PARSE
;;
;; The top-level parser call.
;;
;; INPUT: a list of input symbols, which must be in *LEXICON*
;; GRAMMAR: a list of rule structures, such as *GRAMMAR*
;; SEARCHMODE: One of 'dfs (depth-first-search), 'bfs (breadth-first-search)
;; QUITMODE: if non-NIL, causes parser to quit as soon as it finds the
;;   first chart entry that covers the whole input.

(defun parse (input &key
		    (search *default-searchmode*)
		    (quit *default-quitmode*)
		    (level 999))
  (let* ((position 1)	
	 (final-position (1+ (length input)))
	 (operations 0)
	 (node-counter 0)
	 (start-time (get-internal-real-time))
	 (parse-count 0)
	 (arcs (make-hash-table :test #'equal))
	 nodeid nodefs chart agenda new-arc word entry constituent start end
	 arc-continues arc-ends rule-starts rule-ends time-spent
	 item)
    (setf *parse-value* nil *chart* nil *global-counter* 0)
    (loop
     (when (null agenda)
	   (cond ((null input)
		  (setq time-spent (- (get-internal-real-time) start-time))
		  (chart-trace "~% ~20a No input, quitting [~a operations, ~D sec time used]" item operations (/ time-spent 1000000.0))
		  (setq *chart* chart)
		  (setq *arcs* arcs)
		  (setq *parse-value* (parse-trees final-position chart))
		  (cond (*print-parse-tree*
			 (cond (*parse-value*
				(when *chart-trace* (terpri))
				(dolist (parse *parse-value* (terpri))
					(format t "~%Parse[~a]:" (incf parse-count))
					(pprint parse)
					(terpri)))
			       (t (format t "~%[Chart contains no full parses, looking for minimal cover.]~%")
				  (dolist (entry (minimal-cover (top-level-entries chart)))
				    (format t "~%FS[~a,~a,~a]:~%"
					    (entry-category entry)
					    (entry-start entry) (entry-end entry)
					    )
				    (pns (entry-fs entry))))))
			(*print-parse-fs*
			 (format t "~% ~D sec time used~%" (/ time-spent 1000000.0))
			 (parse-fs final-position chart)))
		  (return (length *parse-value*)))
		 (t
		  (chart-trace "~% ~20a Reading from input ~a" 'EMPTY input)
		  (incf position)
		  (dolist (reading (lookup-word (setq word (pop input))))
			  (setq entry (make-entry 
				       :start    (1- position) 
				       :end      position
;;;				       :category (second (assoc 'cat reading))
				       :category (second (assoc 'nont reading))
				       :id   (incf node-counter)
				       :lexeme   word
				       :fs reading
				       ))
			  (chart-trace "~% ~20a Entering ~a (~a from ~a to ~a) [agenda ~a]" 
				       'EMPTY (entry-category entry) (entry-lexeme entry)
				       (entry-start entry) (entry-end entry) 
				       (if (eq search 'dfs) "top" "bottom")
				       )
			  (incf operations)
			  (if (eq search 'dfs)
			      (push entry agenda)
			    (setq agenda (nconc agenda (list entry))))))))
     (incf *global-counter*)
     (when *trace-arcs*
	   (dump-arcs arcs))
     (setq item (pop agenda))
     (setf start (entry-start item)
	   end (entry-end item)
	   constituent (entry-category item)
	   nodeid (entry-id item)
	   nodefs (entry-fs item))
     ;; As soon as we've popped off the next item in the agenda, check
     ;; to see if it's a final node; quit if we're supposed to, before
     ;; performing any additional operations, but make sure we put the
     ;; item onto the chart before we quit.
     (when (and (= start 1)
		(= end final-position))
	   (cond (quit
		  (setq time-spent (- (get-internal-real-time) start-time))
		  (chart-trace "~% ~20a ** Covers the input, quitting [~a operations, ~D time used]" 
			       item operations (/ time-spent 1000000.0))
		  (push item chart)
		  (setq *chart* chart)
		  (setq *parse-value* (parse-trees final-position chart))
		  (cond (*print-parse-tree*
			(cond (*parse-value*
			       (when *chart-trace* (terpri))
			       (dolist (parse *parse-value* (terpri))
				       (format t "~%Parse[~a]:" (incf parse-count))
				       (pprint parse)
				       (terpri)))
			      (t (format t "~%[Chart contains no full parses.]~%"))))
			(*print-parse-fs*
			 (format t "~% ~D sec time used ~%" (/ time-spent 1000000.0))
			 (parse-fs final-position chart)))
		  (return (length *parse-value*)))
		 (t
		  (chart-trace "~% ~20a ** Covers the input, continuing...[~a operations]" 
			       item operations)
		  )))
     (setq rule-starts (rules-started-by constituent level))
     (chart-trace "~% ~20a Rules started-by: ~a" item (length rule-starts))
     (dolist (rule rule-starts)
	     (setq new-arc (make-arc 
			  :start start
			  :end end
			  :index 1 
			  :lhs (rule-lhs rule) 
			  :rhs (rule-rhs rule) 
			  :children (list nodeid)
			  :unify (rule-unify rule)
			  :strategy (rule-strategy rule)
			  ))
	     (chart-trace "~% ~20a Starts rule ~a -> ~a from ~a to ~a" 
			  item (rule-lhs rule) (insert-index 1 (rule-rhs rule)) start end)
	     (incf operations)
	     (let* ((key `(,end ,(nth (arc-index new-arc) (arc-rhs new-arc))))
		    (out (gethash key arcs)))
	       (if (eq search 'dfs)
		   (setf (gethash key arcs)
			 (push new-arc out))
		 (setf (gethash key arcs) 
		       (nconc out (list new-arc))))))
     (setq rule-ends (rules-completed-by constituent level))
     (chart-trace "~% ~20a Rules completed-by: ~a" item (length rule-ends))
     (dolist (rule rule-ends)
             (let* (; (child-fs nil)
		    (last-fs nodefs)
		    (fs (funcall (rule-unify rule) last-fs)))
	       (setq entry (make-entry
			    :start start
			    :end end
			    :category (rule-lhs rule) 
			    :id (incf node-counter) 
			    :children (list nodeid)
			    :fs fs))
	       (cond (fs
		      (chart-trace "~% ~20a Completes rule ~a -> ~a from ~a to ~a" 
				   item (rule-lhs rule) (insert-index 1 (rule-rhs rule)) start end)
		      (chart-trace "~% ~20a Entering ~a from ~a to ~a [agenda ~a]" 
				   item (rule-lhs rule) start end
				   (if (eq search 'dfs) "top" "bottom")
				   )
		      (incf operations)
		      (if (or (eq (rule-strategy rule) :dfs)
			      (and (not (rule-strategy rule)) (eq search 'dfs)))
			  (push entry agenda)
			(setq agenda (nconc agenda (list entry)))))
		     (t 
		      (chart-trace "~% ~20a Completes rule ~a -> ~a from ~a to ~a **UNIFY FAILED**" 
				   item (rule-lhs rule) (insert-index 1 (rule-rhs rule)) start end)))
	     ))
     (incf operations)
     ;; Always push processed entries onto the front of the chart.
     (push item chart)
     (setq arc-continues (arcs-continued-by start constituent arcs))
     (chart-trace "~% ~20a Arcs continued-by: ~a" item (length arc-continues))
     (dolist (arc arc-continues)
	     (setq new-arc (make-arc
			    :start (arc-start arc) 
			    :end (entry-end item) ;; 11-Oct-96 by EHN (1+ (arc-end arc)) 
			    :lhs (arc-lhs arc) 
			    :rhs (arc-rhs arc)
			    :index (1+ (arc-index arc))
			    :children (append (arc-children arc)
					      (list nodeid))
			    :unify (arc-unify arc)
			    :strategy (arc-strategy arc)
			    ))
	     (chart-trace "~% ~20a Continues arc ~a -> ~a from ~a to ~a"
			  item (arc-lhs arc) 
			  (insert-index (1+ (arc-index arc)) (arc-rhs arc)) 
			  (arc-start arc) (arc-end arc))
	     (incf operations)
	     (let* ((key `(,end ,(nth (arc-index new-arc) (arc-rhs new-arc))))
		    (out (gethash key arcs)))
	       (if (or (eq (arc-strategy arc) :dfs)
		       (and (not (arc-strategy arc)) (eq search 'dfs)))
		   (setf (gethash key arcs) 
			 (push new-arc out))
		 (setf (gethash key arcs)
		     (nconc out (list new-arc))))))
     (setq arc-ends (arcs-completed-by start constituent arcs))
     (chart-trace "~% ~20a Arcs completed-by: ~a" item (length arc-ends))
     (dolist (arc arc-ends)
             (let* ((child-fs 
		     (mapcar #'(lambda (x)
				 (entry-fs (find x chart :key #'entry-id)))
			     (arc-children arc)))
		    (last-fs nodefs)
		    (fs (apply (arc-unify arc)
			       (append child-fs (list last-fs)))))
	     (cond ((not (or (eq '*fail* fs) (null fs)))
		    (setq entry (make-entry 
				 :start (arc-start arc) 
				 :end end
				 :category 
				 (let ((lhs (arc-lhs arc)) wpos)
				   (when (eq *wildcard* lhs)
					 (setq wpos (position *wildcard* (arc-rhs arc)))
					 (setq lhs (entry-category (find (nth wpos (arc-children arc))
									  chart :key #'entry-id))))
				   lhs)
				 :id (incf node-counter)
				 :children (append (arc-children arc) 
						   (list nodeid))
				 :fs fs))
		    (chart-trace "~% ~20a Completes arc ~a -> ~a from ~a to ~a"
				 item (arc-lhs arc) 
				 (insert-index (arc-index arc) (arc-rhs arc))
				 (arc-start arc) (arc-end arc))
		    (chart-trace "~% ~20a Entering ~a from ~a to ~a [agenda ~a]" 
				 item (arc-lhs arc) (arc-start arc) end
				 (if (eq search 'dfs) "top" "bottom"))
		    (incf operations)
		    (if (or (eq (arc-strategy arc) :dfs)
			    (and (not (arc-strategy arc))(eq search 'dfs)))
			(push entry agenda)
		      (setq agenda (nconc agenda (list entry)))))
		   (t (chart-trace "~% ~20a Completes arc ~a -> ~a from ~a to ~a **~a FAILED**"
				   item (arc-lhs arc)
				 (insert-index (arc-index arc) (arc-rhs arc))
				 (arc-start arc) (arc-end arc) (arc-unify arc)
				   ))))))))

;;----------------------------------------------------------------------



(defun compgra (x)
  (let ((fullname (concatenate 'string x ".gra"))
	(outfile (concatenate 'string x ".lisp"))
	(*print-pretty* nil)
	(count 0)
	fn fnbody rule level strategy)
    (with-open-file (out outfile :direction :output :if-exists :supersede)
		    (format out "(DEFVAR *GRAMMAR-LIST*)~%(SETQ *GRAMMAR-LIST* NIL)~%(DEFVAR *GRAMMAR*)~%(SETQ *GRAMMAR* (make-hash-table))~%")
		    (with-open-file (in fullname :direction :input)
				    (loop
				     (setq rule (read in nil in))
				     (when (eq rule in)
					   (return))
				     (incf count)
				     (setq fn (format nil "GRAMMARF-~a" count))
				     (setq fnbody
					   (compile-augmentation (length (fourth rule))
								 (fifth rule)))
				     (setf (first fnbody) fn)
				     (setf fnbody (cons 'defun fnbody))
				     (format out "~a~%" fnbody)
				     (cond ((listp (first rule))
					    (setq level (first (first rule)))
					    (setq strategy (second (first rule))))
					   (t (setq level (first rule))))
				     (format out "(SETQ *GRAMMAR-LIST* (NCONC *GRAMMAR-LIST* (LIST (MAKE-RULE :STRATEGY ~s :LEVEL ~s :LHS '~s :RHS '~s :UNIFY '~a))))~%"
					     strategy level (second rule) (fourth rule) fn)
				     ))
		    (format out "(mapc #'(lambda (item) 
	    (let ((rhs (first (rule-rhs item))))
	      (let ((out (gethash rhs *GRAMMAR*)))
		(setf (gethash rhs *GRAMMAR*)
		      (NCONC out (list item))))))
	*GRAMMAR-LIST*)"))))


;;----------------------------------------------------------------------
;; RULES-STARTED-BY
;;
;; Function which returns a list of the grammar rules which are
;; started by the given constituent.

(defun rules-started-by (constituent highest-level)
  (let* ((wild (gethash *wildcard* *GRAMMAR*))
	 (rest (gethash constituent *GRAMMAR*))
	 (all (nconc wild rest)))
    (remove-if-not #'(lambda (rule)
		       (and
			;; Level is low enough
			(<= (rule-level rule) highest-level)
			;; There are other elements in the RHS to be found
			(rest (rule-rhs rule))))
		   all)))


(defun rules-completed-by (constituent highest-level)
  (let* ((wild (gethash *wildcard* *GRAMMAR*))
	 (rest (gethash constituent *GRAMMAR*))
	 (all (nconc wild rest)))
    (remove-if-not #'(lambda (rule)
		       (and
			;; Level is low enough
			(<= (rule-level rule) highest-level)
			;; There are no other elements in the RHS to find
			(null (rest (rule-rhs rule)))))
		   all)))




;;----------------------------------------------------------------------
;; ARCS-CONTINUED-BY
;;
;; Function which returns a list of the active arcs ending at END
;; which are continued by the given constituent.

(defun arcs-continued-by (end constituent arcs)
  (let* ((wild (gethash `(,end ,*wildcard*) arcs))
	 (rest (gethash `(,end ,constituent) arcs))
	 (all (nconc wild rest)))
    (remove-if-not #'(lambda (arc)
			;; There are more symbols on the arc RHS (so
			;; we're not completing the arc yet)
			(nthcdr (1+ (arc-index arc)) (arc-rhs arc)))
		   all)))

;;----------------------------------------------------------------------
;; ARCS-COMPLETED-BY
;;
;; Function which returns a list of the active arcs ending at END
;; which are continued by the given constituent.

(defun arcs-completed-by (end constituent arcs)
  (let* ((wild (gethash `(,end ,*wildcard*) arcs))
	 (rest (gethash `(,end ,constituent) arcs))
	 (all (nconc wild rest)))
    (remove-if-not #'(lambda (arc)
			(null (nthcdr (1+ (arc-index arc)) (arc-rhs arc))))
		   all)))


(defun dump-arc (arcs) 
  (format t "No implementation for dumping arcs in this version!!"))




;;;
;;;   DMK lookup for xchart
;;; 

;;; the following definition replaces the one that comes with xchart;
;;; it uses the DMK instead of the toy lexicon

(defun lookup-word (word)
  (let ((num (check-if-number word)))
    (if num `((,@num (nont <word>)))
      (let ((dmk-output (parse-eng-word (string-downcase 
					 (symbol-name word)))))
	(if (eq '*OR* (first dmk-output))
	    (mapcar #'(lambda (fs) `(,@fs (nont <word>))) (rest dmk-output))
	  `((,@dmk-output (nont <word>))))))))
	
;;; p1 tokenizes and parses a string

(defun p1 (string)
   (parse (butlast (first (first (tokenize string))))))


