; -*- mode:     CL -*- ----------------------------------------------------- ;
; File:         zebu-driver.lisp
; Description:  Conversion to CL of the original Scheme program (by W. M Wells)
; Author:       Joachim H. Laubsch
; Created:      10-Oct-90
; Modified:     Tue Mar  9 09:05:32 1993 (Joachim H. Laubsch)
; Language:     CL
; Package:      ZEBU
; Status:       Experimental (Do Not Distribute) 
; RCS $Header: $
;
; (c) Copyright 1990, Hewlett-Packard Company
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Revisions:
; RCS $Log: $
; 22-Feb-93 (Joachim H. Laubsch)
;  if the grammar's intern-identifier attribute is true (default), an
;  Identifier will be represented as a symbol, otherwise a string
;  2-Feb-93 (Joachim H. Laubsch)
;  introduce the variable *case-sensitive* to deal with grammars whith
;  case-sensitive keywords
; 13-Jan-93 (Joachim H. Laubsch)
;  rewrote recognize-token so that (in ALL cases) keys that could start an
;  identifier will not be recognized as keys, but as identifiers.
; 27-Nov-92 (Joachim H. Laubsch)
;  Added Variable *preserve-case*
;  "If true, the case of an identifier will be preserved (default false)."
; 29-Sep-92 (Joachim H. Laubsch)
;  a one-character keyword is considered a token iff it is not
;  in identifier-start-chars or if the next character is not in
;  identifier-continue-chars
; 21-Jul-92 (Joachim H. Laubsch)
;  improved handling of NUMBER and IDENTIFIER in next-token
; 27-Apr-92 (Joachim H. Laubsch)
;  introduce *COMMENT-START*, a character that causes everything following
;      until the end-of-line to be ignored
;  introduce *COMMENT-BRACKETS*, a list of pairs of strings that designate
;      everything between them as to be ignored
; 22-Apr-92 (Joachim H. Laubsch)
;  define FILE-PARSER, a function like READ-PARSER that takes input
;  from a file instead of from a string
;  introduced :junk-allowed as argument to READ-PARSER with same meaning
;  as that keyword in READ-FROM-STRING
;  analogously in LIST-PARSER
; 15-Apr-92 (Joachim H. Laubsch)
;  introduce *IDENTIFIER-START-CHARS*
; 30-Oct-91 (Joachim H. Laubsch)
;  improved error checking in case a grammar does not use NUMBER, but the
;  parser will be given strings containing NUMBERs
; 16-Jul-91 (Joachim H. Laubsch)
;  Added a facility to deal with multiple grammars
;  lr-parse takes a third argument, a grammar
;  READ-PARSER and LIST-PARSER take a :grammar keyword argument, defaulting to
;  *current-grammar*
; 26-Jun-91 (Joachim H. Laubsch)
;  Added a proposal to distinguish String and Symbol-tokens in lexical analysis
;  of read-parser.  See comments with section
;         *string-delimiter*, *symbol-delimiter*
; 25-Apr-91 (Joachim H. Laubsch)
;  fixed bug in read-parser which caused scanner to break if a number was the
;  last constituent of a string
; 26-Mar-91 (Joachim H. Laubsch)
;  in the case where a keyword is found, but no action defined, we
;  assume it must be an identifier.  If there is an action entry for
;  an identifier, that identifier is interned from the keyword string read
; 26-Mar-91 (Joachim H. Laubsch)
;  make read-parser read these types of numbers: integer, float, rational
;  1-Mar-91 (Joachim H. Laubsch)
;  made various simple changes, based on monitoring results to speed up
;  READ-PARSER by a factor of 10
; 30-Jan-91 (Joachim H. Laubsch)
;  introduce variable: *string-delimiter*
; 17-Jan-91 (Joachim H. Laubsch)
;  introduced String syntax:  "Fred Jones" is a nll-constant
; 11-Dec-90 (Joachim H. Laubsch)
;  introduced the ZEBU package, and imported its exported symbols into USER
;  7-Dec-90 (Joachim H. Laubsch)
;  if a keyword ending in a symbol-continue-char is followed by a 
;  symbol-continue-char a keyword token is NOT recognized (but an identifier)
;  except if there would have been a single character keyword recognizing the 
;  same initial substring. E.g. ?u?foo1 is tokenized as ?u?, foo1, because
;  there is the shorter keyword alternative: ?, u?foo1
;  The principle is to give priority to the longest possible keyword.
;  (Note that agt007 or agt?x are recognized as identifiers)
; 27-Nov-90 (Joachim H. Laubsch)
;  Lexical Analysis (recognize-token) will recognize any keyword of the
;  language.  If lr-parse is given a token that is a keyword, it may not have
;  an action for it, but if this same token were regarded as an identifier,
;  there would be one.  Instead of reporting an error, lr-parse will now look 
;  first for the identifier-action.  
;    It would be best, if lr-parse could predict, whether an identifier is legal
;  in the current state and then direct recognize-token appropriately.  I should
;  come back to this, and implement that.  It would also save time.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Written by William M. Wells.  This is an example lr parser driver
;;; which uses parse table files generated by Zebu.  

(in-package "ZEBU")

(provide "zebu-driver")
(require "zebu-loader")
;;;
;;; A rudimentary lr parser driver.
;;; It has provisions for applying client supplied procedures which are
;;; associated with productions in the grammar.
;;;
;;;
;;; This code is independent of the parse table generating system,
;;; and basically stand alone,  although
;;; it needs some macros defined in other files.
;;;
(defvar *CURRENT-GRAMMAR* *NULL-Grammar*)

(defvar *disallow-packages* nil
  "If false, Zebu parses identifiers as symbols possibly qualified by a package")

(defvar *preserve-case* nil
  "If true, the case of an identifier will be preserved (default false).")

(defvar *case-sensitive* nil
  "If true, the case of a keyword matters otherwise case is ignored when \
looking for the next token (default false).")

(defvar *terminal-alist-SEQ*)

(defvar *lexer-debug* nil)
(eval-when (compile)
  (setq *lexer-debug* nil))

(defmacro if-debugging-lexer (then else)
  `,(if *lexer-debug* then else))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                  utilities
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; (upcased-subseq string from to) == (string-upcase (subseq string from to))
;; but avoids a copy
(defun upcased-subseq (string beg end)
  (declare (simple-string string) (fixnum beg end))
  (let* ((size (- end beg))
	 (R (make-sequence 'simple-string size))
	 (stringi beg))
    (declare (simple-string R) (fixnum stringi))
    (dotimes (index size)
      (setf (aref R index) (char-upcase (the character (aref string stringi))))
      (incf stringi))
    R))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                             The LR parser itself
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;; symbol-stack and state-stack are the standard things for an lr parser.
;;; the client lambdas and stack are used in the following fashion:
;;;
;;;   When a shift action occurs, the instantiation of the input symbol
;;;   is pushed onto the client stack.
;;;
;;;   When a reduce action occurs, as many items as are on the lhs
;;;   of the associated production are popped from the client stack
;;;   and the corresponding client lambda is applied to the popped
;;;   items.  The result of the application is then pushed onto the 
;;;   client stack.  One may of course do whatever one wishes by side
;;;   effect.

;;; when junk-allowed, 2 values are returned:
;;;         the object found so far
;;;         the value returned by last-pos-fn
;;; last-pos-fn should be defined as a function that returns the place
;;;         before the token just returned by next-token-fn

;;; when more-allowed, no "<end of string>" error is issued but
;;; more-fn is called to extend the token-stream that next-token-fn is
;;; using.

(defun lr-parse (next-token-fn err-fn grammar
			       &optional junk-allowed last-pos-fn
			       more-allowed more-fn
			       &aux symbol-stack client-stack)
  (declare #+:ANSI-COMMON-LISP (dynamic-extent symbol-stack client-stack)
	   (type cons symbol-stack client-stack))
  (flet ((undef-action-error (token &optional index)
	   (let ((type (if index
			   (let ((e (aref (grammar-lexicon grammar) index)))
			     (if (stringp e) 'key )))))
	     (funcall
	      err-fn
	      (format
	       nil "Syntax error (action not defined for token: ~S ~@[a ~s~])"
	       token type)))))
    (let ((state-stack (list (grammar-lr-parser-start-state-index grammar)))
	  (production-info (grammar-production-info grammar))
	  (action-table (grammar-action-table grammar))
	  (goto-table (grammar-goto-table grammar))
	  (client-lambdas (grammar-client-lambdas grammar))
	  (identifier-index (grammar-identifier-index grammar))
	  (end-symbol-index (grammar-end-symbol-index grammar)))
      (declare #+:ANSI-COMMON-LISP (dynamic-extent state-stack)
	       (cons state-stack)
	       (fixnum identifier-index end-symbol-index)
	       (simple-vector action-table goto-table))
      (multiple-value-bind (input-symbol-instantiation input-symbol-index)
	  ;; we should rather predict the token (future work)!!
	  (funcall next-token-fn)
	(if-debugging (format t "~%Looking-at: ~S . ~S"
			      input-symbol-instantiation
			      (aref (grammar-lexicon grammar)
				    input-symbol-index)))
	(loop
	 (let* ((state-stack-top (car (the cons state-stack)))
		(action-table-top (aref action-table state-stack-top))
		(action-entry
		 (when (typep input-symbol-index 'FIXNUM)
		   (vec-bs-assoc (the fixnum input-symbol-index)
				 action-table-top))))
	   ;;(break "action-entry ~S" action-entry)
	   (when (null action-entry)
	     (if (eq input-symbol-index end-symbol-index)
		 (if more-allowed
		     (progn
		       (funcall more-fn)
		       (multiple-value-setq
			   (input-symbol-instantiation input-symbol-index)
			 (funcall next-token-fn))
		       (setq action-entry (vec-bs-assoc
					   (the fixnum input-symbol-index)
					   action-table-top)))
		   (undef-action-error "<end of string>"))
	       (unless (and junk-allowed
			    ;; assume that EOF was seen
			    (setq action-entry 
				  (vec-bs-assoc
				   end-symbol-index action-table-top)))
		 (or (and (setq action-entry 
				(vec-bs-assoc identifier-index
					      action-table-top))
			  (stringp input-symbol-instantiation)
			  (identifier-start-char-p
			   (aref input-symbol-instantiation 0))
			  (not (find-if-not #'identifier-continue-char-p
					    input-symbol-instantiation
					    :start 1))
			  (setq input-symbol-instantiation
				(intern
				 (if *preserve-case*
				     (the string input-symbol-instantiation)
				   (string-upcase
				    (the string input-symbol-instantiation))))
				input-symbol-index identifier-index))
		     (undef-action-error input-symbol-instantiation
					 input-symbol-index)))))
	   (let ((ae-cdr (cdr (the cons action-entry))))
	     (case (car (the cons ae-cdr))
	       (:S			; Shift.
		(if-debugging (format t "~%shift to ~S" (cadr ae-cdr)))
		(push input-symbol-index symbol-stack)
		(push (cadr ae-cdr) state-stack)
		(push input-symbol-instantiation client-stack)
		(multiple-value-setq
		    (input-symbol-instantiation input-symbol-index)
		  (funcall next-token-fn))
		(if-debugging
		 (format t "~%Looking-at: ~S . ~S"
			 input-symbol-instantiation input-symbol-index)))
	       (:R			; Reduce.
		(let* ((prod-index (cadr ae-cdr))
		       (p (aref production-info prod-index))
		       ;; p = <lhs-symbol-index> . <production-length>
		       (prod-lhs (car (the cons p)))
		       (prod-ln (cdr (the cons p)))
		       (client-lambda (aref client-lambdas prod-index)))
		  (if-debugging (format t "~%Reduce ~S" prod-index))
		  ;; optimize simple cases
		  (case prod-ln
		    (0			; Apply the client lambda and store the result.
		     (if-debugging (format t "~%; Calling ~S" client-lambda))
		     (push (funcall client-lambda) client-stack)
		     (if-debugging 
		      (let ((R (car client-stack)))
			(format t "~%; -> ~S : ~S" R (type-of R)))))
		    (1			; Apply the client lambda and store the result.
		     (if-debugging (format t "~%; Applying ~S to ~S"
					   client-lambda (car client-stack)))
		     (when client-lambda
		       (setf (car client-stack)
			     (funcall client-lambda (car client-stack))))
		     (setq symbol-stack (cdr symbol-stack)
			   state-stack  (cdr state-stack))
		     (if-debugging 
		      (let ((R (car client-stack)))
			(format t "~%; -> ~S : ~S" R (type-of R)))))
		    (t (let (constituents)
			 (dotimes (i prod-ln) 
			   (setq symbol-stack (cdr symbol-stack)
				 state-stack  (cdr state-stack))
			   (push (pop client-stack) constituents))
			 ;; Apply the client lambda and store the result.
			 (if-debugging (format t "~%; Applying ~S to ~S"
					       client-lambda constituents))
			 (push (apply client-lambda ; action
				      constituents)
			       client-stack)
			 (if-debugging 
			  (let ((R (car client-stack)))
			    (format t "~%; -> ~S : ~S" R (type-of R)))))))
		  (push prod-lhs symbol-stack) ; Push lhs of production.
		  (let ((goto (cdr (the cons
					(vec-bs-assoc
					 prod-lhs
					 (aref goto-table (car state-stack)))))))
		    (if (null goto) 
			(funcall err-fn "table error? goto not defined!"))
		    (push goto state-stack))))
	       (:A
		;; Accept on END symbol.
		(if (= input-symbol-index end-symbol-index)
		    (return (car client-stack))
		  (if junk-allowed
		      (return (values (car client-stack)
				      (when last-pos-fn (funcall last-pos-fn))))
		    (funcall err-fn "extra input?"))))
	       (T (funcall err-fn
			   (format nil
				   "bogus action: ~S" (car ae-cdr))))))))))))


;;; A function for looking up table entries using binary search
;;; the vector elements are the assoc key and should be in increasing order.

(defun vec-bs-assoc (num vec)
  (declare (type fixnum num) (type vector vec))
  (labels ((vec-bs-assoc-aux (start end)
	     (declare (type fixnum start end))
	     (let ((start-entry (aref vec start)))
	       (declare (type cons start-entry))
	       (cond ((= num (the fixnum (car start-entry))) start-entry)
		     ((= start end) nil)
		     (T (let ((mid (floor (+ start end) 2)))
			  (declare (type fixnum mid))
			  (if (> num (the fixnum (car (aref vec mid))))
			      (vec-bs-assoc-aux (1+ mid) end)
			    (vec-bs-assoc-aux start mid))))))))
    (let ((last (1- (length (the vector vec)))))
      (declare (type fixnum last))
      (if (or (< num (the fixnum (car (aref vec 0))))
	      (> num (the fixnum (car (aref vec last)))))
	  nil
	(vec-bs-assoc-aux 0 last)))))


;;; Figure out to which element of the lexicon a token corresponds.
;;; This gets a little complicated for terminal symbols which can
;;; vary at parsing time, for example, identifiers and numbers.  The way
;;; these "preterminals" are handled in this driver is as follows:
;;; If a token passes the CL test PARSE-NUMBER, and the argument number-index
;;; isn't false, then number-index is treated as representing its category.
;;; Otherwise, if the token appears exactly in the lexicon, then it is
;;; given the category of the lexicon item.  Otherwise it is assumed
;;; to be an instance of the terminal IDENTIFIER, whose presence in the
;;; lexicon is indicated by a non false value for the id-index argument.
;;; If the token isn't explicitly in the lexicon, and id-index is false,
;;; then an error is signalled.
;;; 


;;; number-index should be the index of the grammar symbol which stands
;;; for numbers, otherwise it should be false if numbers don't appear
;;; in the grammar.
;;;
;;; id-index should be the index of the grammar symbol which stands
;;; for identifiers, otherwise it should be false if identifiers don't
;;; appear in the grammar.


(defun categorize (token grammar)
  (let ((category 
	 (if (numberp token)
	     (progn (if-debugging
		     (assert number-index ()
			     "A number was seen in the token stream"))
		    (grammar-number-index grammar))
           (let ((terminal-associations
		  (elt (grammar-terminal-alist-SEQ grammar)
		       (char-code (char-downcase
				   (the character (aref (string token) 0)))))))
             (if terminal-associations
		 (let ((terminal-association (assoc token terminal-associations
						    :test #'equal)))
		   (if terminal-association
		       (cdr terminal-association)
		     (grammar-identifier-index grammar)))
	       (grammar-identifier-index grammar))))))
    (values token category)))

(proclaim '(inline end-of-tokens-category))
(defun end-of-tokens-category (grammar)
  (values Nil (grammar-end-symbol-index grammar)))

;;; This implements a parser which gets its tokens from the supplied list.
;;; It uses the parsing engine lr-parse which is defined above.  It also
;;; uses the function categorize to classify tokens according to the 
;;; lexicon.

(defun list-parser (token-list &key (grammar *current-grammar*) junk-allowed)
  (let ((last-position token-list)
        token1)
    (check-type token-list list)
    (lr-parse
     ;; This lambda is the tokenizer supplied to the parsing engine:
     #'(lambda ()
         (if (null token-list)
             (end-of-tokens-category grammar)
           (progn
             (setq last-position token-list
                   token1 (pop token-list))
             (categorize token1 grammar))))
     ;; This lambda is the error function supplied to the parsing engine:
     #'(lambda (string)
	 (error "~S~% Remaining tokens: ~S~{ ~S~}"
		string token1 token-list))
     grammar
     junk-allowed
     ;; Function that returns the remaining unparsed token-list
     #'(lambda () last-position))))
    
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                 read-parser
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;; This implements a parser which gets its tokens from the Lisp function
;;; read.
;;; It uses the parsing engine lr-parse which is defined above.  It also
;;; uses the function categorize to classify tokens according to the 
;;; lexicon.  It will signal the end of input to the parser when it
;;; if it reads the end of file.

(defun read-parser (string &key (error-fn #'error)
			   (print-parse-errors t)
			   (grammar *current-grammar*)
			   (start 0)
			   junk-allowed
			   more-allowed
			   more-fn)
  (declare (string string))
  (check-type string string)
  (check-type grammar grammar)
  (let ((number-index (grammar-number-index grammar))
	(identifier-index (grammar-identifier-index grammar))
	(string-index (grammar-string-index grammar))
	(string-ln (length (the string string)))
	(last-pos 0)
	(pos start)
	(end-symbol-index (grammar-end-symbol-index grammar))
	(*identifier-start-chars-V* (grammar-identifier-start-chars-V grammar))
	(*identifier-continue-chars-V* (grammar-identifier-continue-chars-V grammar))
	(*terminal-alist-SEQ* (grammar-terminal-alist-SEQ grammar))
	(intern-identifier (grammar-identifier-index grammar))
	(white-space      (grammar-white-space grammar))
	(string-delimiter (grammar-string-delimiter grammar))
	(symbol-delimiter (grammar-symbol-delimiter grammar))
	(lex-cat-map (grammar-lex-cat-map grammar))
	token)
    (declare (fixnum string-ln pos last-pos)
	     (special *identifier-continue-chars-V*
		      *identifier-start-chars-V*))
    (flet ((white-space-p (char)
	     (member (the character char) white-space
		     :test #'char=))
	   (digit-seq? (dec end)
	     (and dec
		  (or (>= end string-ln)
		      (let ((c (aref string end)))
			(or		; (white-space-p c)
			 (not (identifier-continue-char-p c)))))))
	   (new-fraction (num den places)
	     (values (float (+ num (/ den (expt 10 places))))
		     number-index)))
      ;; The tokenizer supplied to the parsing engine:	   
      (flet ((next-token ()
	       ;; skip initial blanks
	       (setq last-pos pos
		     pos (or (position-if-not #'white-space-p string :start pos)
			     string-ln))
	       (when (>= pos string-ln)
		 (return-from next-token (values Nil end-symbol-index)))

	       ;; scan regular expressions first
	       (dolist (lex-cat-pair lex-cat-map)
		 (let ((new-pos (funcall (the function (cdr lex-cat-pair))
					 string pos string-ln)))
		   (when new-pos
		     (let ((instance  (subseq string pos new-pos)))
		       (setq pos new-pos)
		       (return-from next-token
			 (values instance (car lex-cat-pair)))))))

	       ;; read symbol, string, or number
	       ;; foo : symbol, 'foo' : symbol, "foo" : string, 3/4 : number
	       ;; recognize a number: <digit>+ [ "." <digit>+ ]
	       ;;                     <digit>+ "/" <digit>+
	       (when number-index
		 (multiple-value-bind (number end)
		     (parse-integer string :start pos :junk-allowed t)
		   (if (not number)
		       ;; the case .<integer>
		       (when (eql (aref string pos) '#\.)
			 (multiple-value-bind (dec end)
			     (parse-integer string
					    :start (1+ pos) :junk-allowed t)
			   (when (digit-seq? dec end)
			     (let ((places (- end (1+ pos))))
			       (setq pos end)
			       (return-from next-token
				 (new-fraction 0 dec places))))))
		     (progn
		       (when (>= end string-ln)
			 (setq pos end)
			 (return-from next-token (values number number-index)))
		       (let ((c (aref string end))
			     (p (1+ end)))
			 (case c
			   (#\/ (multiple-value-bind (denom end)
				    (parse-integer string
						   :start p :junk-allowed t)
				  (when denom
				    (setq pos end)
				    (return-from next-token
				      (values (/ number denom) number-index))))
				(setq pos end)
				(return-from next-token
				  (values number number-index)))
			   (#\. (multiple-value-bind (dec end)
				    (parse-integer string
						   :start p :junk-allowed t)
				  (when (digit-seq? dec end)
				    (let ((places (- end p)))
				      (setq pos end)
				      (return-from next-token
					(new-fraction number dec places)))))
				(setq pos p)
				(return-from next-token
				  (values number number-index)))
			   (t (unless (identifier-continue-char-p c)
				(setq pos end)
				(return-from next-token
				  (values number number-index))))))))))
	       ;; recognize a grammar keyword
	       (multiple-value-bind (token-association token-length)
		   (recognize-token string pos)
		 (when token-association
		   ;; token recognized
		   (setq pos (+ pos token-length)
			 token (car token-association))
		   (return-from next-token
		     (values token (cdr token-association)))))
	       ;; recognize an identifier or string
	       (let ((char (aref string pos)) c)
		 (declare (character char c))
		 (flet ((parse-delimited-id (delimiter kind)
			  ;; when successful set token and pos!!
			  (flet ((eof-error ()
				   (return-from read-parser
				     (funcall
				      error-fn
				      (format
				       nil "~a delimiter ~S expected"
				       kind delimiter)))))
			    (when (char= char delimiter)
			      (do ((p (incf pos) (1+ p))
				   (escaped? nil (char= c #\\)))
				  (nil)
				(declare (fixnum p))
				(when (= p string-ln)
				  (if more-fn
				      (setq string
					    (concatenate
					     'string
					     string (string #\Newline)
					     (funcall more-fn #'eof-error))
					    string-ln (length string))
				    (eof-error)))
				(setq c (aref string p))
				(when (and (char= c delimiter)
					   (not escaped?))
				  (setq token (subseq string pos p)
					pos (1+ p))
				  (return-from parse-delimited-id t)))))))
		   (when (and
			  identifier-index
			  (parse-delimited-id symbol-delimiter "Symbol"))
		     (return-from next-token
		       (values (intern token) identifier-index)))
		   (when (and string-index
			      (parse-delimited-id
			       string-delimiter "String"))
		     (return-from next-token
		       (values token string-index))))

		 ;; Does char start an identifier?
		 (flet ((parse-id ()
			  ;; Any char not in *identifier-continue-chars* terminates
			  (do ((p (1+ pos) (1+ p))) 
			      ((or (= p string-ln)
				   (not (identifier-continue-char-p (aref string p))))
			       (prog1 (if *preserve-case*
					  (subseq string pos p)
					(upcased-subseq string pos p))
				 (setq pos p)))
			    (declare (fixnum p)))))
		   (let ((Id-String
			  (block Identifier
			    (when (identifier-start-char-p char)
			      (let ((id1 (parse-id)))
				(when (or (= pos string-ln)
					  (char/= (aref string pos) #\:)
					  *disallow-packages*)
				  (return-from Identifier id1))
				;; more chars follow the ":" ?
				(let ((package (find-package id1)))
				  (unless package
				    (return-from Identifier id1))
				  ;; <package-symbol>: ...
				  (let* ((p (1+ pos))
					 (next (aref string p)))
				    (when (char= next #\:)
				      (setq next (aref string (incf p))))
				    (unless (identifier-start-char-p next)
				      (return-from Identifier id1))
				    (setq pos p)
				    (return-from next-token
				      (intern (the simple-string (parse-id)) package))))))
			    ;; Symbol in keyword package ?
			    (when (and (char= char #\:)
				       (identifier-start-char-p
					(aref string (incf pos))))
			      (return-from next-token
				(values (intern (THE SIMPLE-STRING
						     (parse-id))
						*keyword-package*)
					identifier-index))))))
		     (when Id-String
		       (return-from next-token
			 (values (if intern-identifier
				     (intern Id-String) Id-String)
				 identifier-index)))))
		 (funcall error-fn
			  (format nil "Identifier or String expected at ~S"
				  (subseq string pos))))))
	(apply
	 #'lr-parse
	 (if-debugging-lexer		;  for testing
	  #'(lambda () (multiple-value-bind (token id)
			   (next-token)
			 (format t "~%New Token: ~S . ~S Pos: ~S"
				 token id pos)
			 (values token id)))
	  #'next-token)
	 ;; This is the error function supplied to the parsing engine:
	 #'(lambda (msg)
	     (when print-parse-errors
	       (format t "~%Last token read: ~S~%Remaining: ~A~@[~A ...~]~%"
		       token
		       (subseq string pos)
		       (when more-allowed (funcall more-fn))))
	     (funcall error-fn msg))
	 grammar
	 junk-allowed
	 #'(lambda () last-pos)
	 (when more-allowed
	   (list more-allowed		; then more-fn will supply more
		 #'(lambda ()
		     (setq string (funcall more-fn)
			   string-ln (length (the string string))
			   pos 0)))))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                file-parser 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; parse expressions in GRAMMAR reading from FILE
;; returns: a list of the parse-results, i.e. what would have been 
;;          returned by read-parser

(defvar *comment-brackets* '(("#|" . "|#")) )
(defvar *comment-start* #\; )

(defun file-parser (file &key 
			 (error-fn #'error)
			 (print-parse-errors t)
			 (grammar *current-grammar*)
			 (verbose *load-verbose*))
  (with-open-file (s (merge-pathnames file) :direction :input)
    (file-parser-aux s error-fn print-parse-errors grammar verbose)))

(defun file-parser-aux (stream error-fn print-parse-errors grammar verbose
			 &aux R (eof (cons nil nil)))
  (labels ((skip-lines (stream end)
	     ;; ignore lines until end is found
	     (let ((l (read-line stream nil eof)))
	       (if (stringp l)
		   (let ((p (search end l)))
		     (if p
			 (let ((l-rest (string-left-trim
					'(#\Space #\Tab)
					(subseq l (+ p (length end))))))
			   (if (string= l-rest "")
			       (next-line stream)
			     l-rest))
		       (skip-lines stream end)))
		 l)))
	   (next-line (stream)		; ignore comments
	     (let ((l (read-line stream nil eof)))
	       (when verbose (terpri) (princ l))
	       (if (stringp l)
		   (let ((l-length (length (setq l (string-left-trim
						    '(#\Space #\Tab) l)))))
		     (if (zerop l-length)
			 (next-line stream)
		       (if (char= *comment-start* (aref l 0))
			   (next-line stream)
			 ;; does this line start a comment
			 (dolist (comment *comment-brackets* l)
			   (let* ((start (car comment))
				  (start-length (length start)))
			     (when (and
				    (>= l-length start-length)
				    (string= l start :end1 start-length))
			       ;; a comment found
			       (return
				 (setq l (skip-lines
					  stream
					  (cdr comment))))))))))
		 l))))
    (do ((line (next-line stream)))
	((eq line eof) (nreverse R))
      (multiple-value-bind (expr rest)
	  (read-parser line
		       :error-fn error-fn
		       :print-parse-errors print-parse-errors
		       :grammar grammar
		       :junk-allowed t
		       :more-allowed t
		       :more-fn #'(lambda (&optional error-fn)
				    (setq line (next-line stream))
				    (if (eq line eof)
					(if error-fn
					    (funcall error-fn)
					  (error "Reached end of file ~S while parsing"
					       stream))
				      line)))
	;; (when verbose (let ((*print-structure* t)) (print expr)))
	(setq line (if rest
		       (subseq line rest)
		     (next-line stream)))
	(push expr R)))))

;----------------------------------------------------------------------------;
; recognize-token
;----------------
; 

(defun recognize-token (string pos
  &aux (string-length (length (the string string))))
  ;; Does any of the terminal symbols of the grammar start STRING at POS?
  ;; In case it does, it must be the longest one
  ;; the ordering of terminal-alist makes sure we find the longest keyword
  ;; first
  (declare (string string) (fixnum string-length))
  (let ((max-token-length (- string-length (the integer pos))))
    (declare (fixnum max-token-length))
    (flet ((recognize-token-aux (ta)
	     (do ((ta-rest ta (cdr (the cons ta-rest))))
		 ((null ta-rest) nil)
	       (let* ((token-association (car (the cons ta-rest)))
		      (terminal-token (car token-association))
		      (token-length (length (the string terminal-token))))
		 (declare (fixnum token-length) (string terminal-token))
		 (when (and (>= max-token-length token-length)
			    (let ((string-end (+ pos token-length)))
			      (declare (fixnum string-end))
			      (and
			       (if *case-sensitive*
				   (string= (the string terminal-token)
					    string
					    :start2 pos :end2 string-end)
				 (string-equal (the string terminal-token)
					       string
					       :start2 pos :end2 string-end))
			       ;; If we recognize a keyword, that could start
			       ;; an identifier, the following char must
			       ;; not also be a symbol-continue-char.
			       ;; If it is (e.g. "agent1") and there exists
			       ;; no shorter key that would accept this,
			       ;; then we will not recognize the key ("agent")
			       ;; but this leads us to recognize in "?u?x" the
			       ;; token "?u?" instead of "?"
			       
			       ;; if we are at the end of the string,
			       ;; we accept
			       (or (not (< string-end string-length))
				   ;; if a identifier-continue-char doesn't
				   ;; follow, we also accept
				   (not (identifier-continue-char-p
					 (aref string string-end)))
				   ;; if the key does not start with
				   ;; an identifier-start-char we accept
				   (not (identifier-start-char-p
					 (aref terminal-token 0)))
				   ;; if any of the remaining chars of the key
				   ;; is not a identifier-continue-char,
				   ;; we also accept
				   (find-if-not #'identifier-continue-char-p
						terminal-token
						:start 1)))))
		   (return-from recognize-token-aux
		     (values token-association token-length)))))))
      (recognize-token-aux
       (elt *terminal-alist-SEQ*
	    (char-code (char-downcase (the character (aref string pos)))))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                               End of driver.l
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
