;*              Copyright 1992 Digital Equipment Corporation
;*                         All Rights Reserved
;*
;* Permission to use, copy, and modify this software and its documentation is
;* hereby granted only under the following terms and conditions.  Both the
;* above copyright notice and this permission notice must appear in all copies
;* of the software, derivative works or modified versions, and any portions
;* thereof, and both notices must appear in supporting documentation.
;*
;* Users of this software agree to the terms and conditions set forth herein,
;* and hereby grant back to Digital a non-exclusive, unrestricted, royalty-free
;* right and license under any changes, enhancements or extensions made to the
;* core functions of the software, including but not limited to those affording
;* compatibility with other hardware or software environments, but excluding
;* applications which incorporate this software.  Users further agree to use
;* their best efforts to return to Digital any such changes, enhancements or
;* extensions that they make and inform Digital of noteworthy uses of this
;* software.  Correspondence should be provided to Digital at:
;* 
;*			Director, Cambridge Research Lab
;*			Digital Equipment Corp
;*			One Kendall Square, Bldg 700
;*			Cambridge MA 02139
;* 
;* This software may be distributed (but not offered for sale or transferred
;* for compensation) to third parties, provided such third parties agree to
;* abide by the terms and conditions of this notice.
;* 
;* THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
;* WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
;* MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
;* CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
;* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
;* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
;* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
;* SOFTWARE.

; $Id: boblex.scm,v 1.16 1992/12/29 20:46:55 jmiller Exp $

(define bob-tokens
  `((add . add) (and . and) (arguments . arguments) (as . as) (be . be)
    (binary . binary) (bind . bind) (binding . binding) (binds . bind)
    (by . by) (call . call) (called . called) (case . case) (catch . catch)
    (class . class) (declare . declare) (described . described) (do . do)
    (each . each) (else . else) (elseif . elseif) (except . except)
    (exit . exit) (false . false) (for . for) (function . function)
    (generic . generic) (if . if) (in . in) (initialized . initialized)
    (initially . initially) (keywords . keywords) (left . left)
    (like . like) (method . method) (must . must) (named . called)
    (next . next) (nowhere . nowhere) (object . object) (of . of) (on . on)
    (others . others) (protect . protect) (read . read) (remaining . remaining)
    (reraise . reraise) (right . right) (set . set) (setter . setter)
    (slot . slot) (stored . stored) (subclass . subclass) (the . the)
    (then . then) (to . to) (true . true) (try . try) (unary . unary)
    (until . until) (unwind . unwind) (using . using) (values . values)
    (while . while) (with . with) (yielding . yielding)))

(define bob-special-tokens
  '((#\,  COMMA)
    (#\;  SEMICOLON)
    (#\(  OPEN-PAREN)
    (#\)  CLOSE-PAREN)
    (#\[  OPEN-BRACKET)
    (#\]  CLOSE-BRACKET)
    (#\{  OPEN-BRACE)
    (#\}  CLOSE-BRACE)
    (#\|  BITWISE-OR (#\| LOGICAL-OR))
    (#\-  MINUS)
;;    (#\/  DIVIDE) Handled very specially because of comments!
    (#\<  LESS-THAN (#\< LEFT-SHIFT) (#\= LESS-THAN-OR-EQUAL))
    (#\=  ASSIGN (#\= LOGICAL-EQUAL) (#\> CASE-TERMINATOR))
    (#\>  GREATER-THAN (#\> RIGHT-SHIFT) (#\= GREATER-THAN-OR-EQUAL))
    (#\^  XOR)
    (#\~  BITWISE-NEGATE)
    (#\!  LOGICAL-NEGATE (#\= UNEQUAL))
    (#\%  REMAINDER)
    (#\&  BITWISE-AND (#\& LOGICAL-AND))
    (#\*  MULTIPLY)
    (#\+  ADD)))

(define char-set/bob-special
  (chars->char-set (map car bob-special-tokens)))

(define char-set/bob-symbol-constituents
  ;; None of the characters which follow a special-token in the
  ;; above table are permitted as symbol constituents.  Otherwise
  ;; expand-table (below) will fail.
  (char-set-union (predicate->char-set char-alphabetic?)
		  (char-set-union
		   (predicate->char-set char-numeric?)
		   (char-set #\. #\! #\$ #\? #\~ #\_ #\:))))

(define char-set/bob-symbol-terminators
  (char-set-invert char-set/bob-symbol-constituents))

(define char-set/bob-symbol-leaders
  (char-set-difference char-set/bob-symbol-constituents
		       char-set/bob-special))

(define (bob-intern-string! string)
  (let ((length (string-length string)))
    (do ((i 0 (1+ i)))
	((= i length)
	 (string->symbol (canonicalize-string-for-symbol string)))
      (if (char=? #\_ (string-ref string i))
	  (string-set! string i #\-)))))

(define (bob/read-atom)
  (read-string char-set/bob-symbol-terminators))

(define (bob/symbol-or-keyword)
  (let* ((string (bob/read-atom))
	 (nchars (string-length string)))
    ;; Can't do intern-string! here because it changes the case!
    (cond ((= nchars 1) (bob-intern-string! string))
	  ((char=? #\: (string-ref string (- nchars 1)))
	   `(KEYWORD ,(bob-intern-string! string)))
	  (else (let loop ((i 0))
		  (cond ((= i nchars)
			 (let ((symbol (bob-intern-string! string)))
			   (cond ((assq symbol bob-tokens)
				  => (lambda (entry)
				       `(TOKEN ,(cdr entry) ,symbol)))
				 (else symbol))))
			((not (char-lower-case? (string-ref string i)))
			 (bob-intern-string! string))
			(else (loop (+ i 1)))))))))

(define bob/numeric-prefix
  (make-parse-object/numeric-prefix bob/read-atom))
(define bob/number (make-parse-object/number bob/read-atom))

(define (symbol-constituent? char)
  (and char
       (char-set-member? char-set/bob-symbol-constituents char)))

(define bob-symbol-wrappers
  '((#\< #\>)
    (#\* #\*)
    (#\% #\%)
    (#\^ #\^)
    (#\/ #\/)))

(define char-set/bob-symbol-wrappers
  (chars->char-set (map car bob-symbol-wrappers)))

(define (bob/wrapped-symbol initial-char default-token)
  (let ((terminator (cadr (assq initial-char bob-symbol-wrappers)))
	(default-symbol (bob-intern-string! (string initial-char))))
    (lambda ()
      (let* ((initial-string (bob/read-atom))
	     (next-char (parser-peek-char/eof-ok)))
	(cond ((char=? next-char terminator)
	       (discard-char)
	       (bob-intern-string! (string-append
				    (string initial-char)
				    initial-string
				    (string next-char))))
	      ((zero? (string-length initial-string))
	       `(TOKEN ,default-token ,default-symbol))
	      (else
	       (list `(TOKEN ,default-token ,default-symbol)
		     (bob-intern-string! initial-string))))))))

(define (expand table)
  (define (bob/handler entry discard?)
    ;; DISCARD? is true if the first character needs to be discarded
    (let ((first-char (car entry))
	  (entry (cdr entry)))
      (if (null? (cdr entry))
	  (let ((result `(TOKEN ,(car entry)
				,(bob-intern-string! (string first-char)))))
	    (if discard?
		(lambda () (discard-char) result)
		(lambda () result)))
	  (let ((default `(TOKEN ,(car entry)
				 ,(bob-intern-string! (string first-char))))
		(table (map (lambda (entry)
			      (cons (car entry)
				    `(TOKEN ,(cadr entry)
					    ,(bob-intern-string!
					      (string first-char
						      (car entry))))))
			    (cdr entry))))
	    (if discard?
		(lambda ()
		  (discard-char)
		  (let ((this-char (assq (parser-peek-char/eof-ok) table)))
		    (if this-char
			(begin (discard-char)
			       (cdr this-char))
			default)))
		(lambda ()
		  (let ((this-char (assq (parser-peek-char/eof-ok) table)))
		    (if this-char
			(begin
			  (discard-char)
			  (cdr this-char))
			default))))))))
  (let loop ((result '())
	     (table table))
    (if (null? table)
	result
	(let* ((entry (car table))
	       (character (car entry))
	       (default-token (cadr entry)))
	  (loop
	   (cons
	    (list character
		  (if (char-set-member? char-set/bob-symbol-wrappers
					character)
		      (let ((wrap-handler
			     (bob/wrapped-symbol character default-token))
			    (what-to-do (bob/handler entry #F)))
			(lambda ()
			  (discard-char)
			  (if (symbol-constituent? (parser-peek-char/eof-ok))
			      (wrap-handler)
			      (what-to-do))))
		      (bob/handler entry #T)))
	    result)
	   (cdr table))))))

(define bob/slash
  (let ((wrapped (bob/wrapped-symbol #\/ 'DIVIDE)))
    (lambda ()
      (discard-char)
      (let ((next (parser-peek-char/eof-ok)))
	(cond ((not next) `(TOKEN DIVIDE '/)) ; EOF
	      ((char=? next #\/) (parse-object/comment))
	      ((symbol-constituent? next) (wrapped))
	      (else `(TOKEN DIVIDE '/)))))))

(define (make-bob-lexer-table)
  (let ((table
	 (make-parser-table parse-object/undefined #F
			    thomas/special-undefined #F)))
    (for-each (lambda (entry)
		(apply parser-table/set-entry! table entry))
	      `(("#" ,parse-object/special ,collect-list/special)
		(,char-set/bob-symbol-leaders ,bob/symbol-or-keyword)
		,@(expand bob-special-tokens)
		(,char-set/digits ,bob/number)
		(("#b" "#B") ,bob/numeric-prefix)
		(("#o" "#O") ,bob/numeric-prefix)
		(("#d" "#D") ,bob/numeric-prefix)
		(("#x" "#X") ,bob/numeric-prefix)
		(,char-set/whitespace
		 ,parse-object/whitespace
		 ,collect-list/whitespace)
		("\"" ,parse-object/string-quote)
		("/" ,bob/slash)
		("\\" ,parse-object/thomas-char-quote)
		("'" ,(lambda () 
			(discard-char)
			`(TOKEN QUOTE 'QUOTE)))
		("." ,(parse-object/peculiar-identifier '...))))
    table))

(define (thomas/special-undefined)
  (parse-error "Illegal number syntax" (parser-peek-char))
  (parse-object/dispatch))

(define (btest s)
  (string->object s (make-bob-lexer-table) "EOF Object" list))

(define (lex s)
  (let ((table (make-bob-lexer-table)))
    (let loop ((s s)
	       (result '()))
      (if (zero? (string-length s))
	  (reverse result)
	  (string->object s table `(TOKEN END-OF-FILE)
	    (lambda (object offset error)
	      (if error
		  (write-line `(ERROR ,error AT ,offset INFO ,object
				SO FAR ,(reverse result)))
		  (loop (substring s offset (string-length s))
			(if (or (not (pair? object))
				(eq? (car object) 'token)
				(eq? (car object) 'keyword))
			    (cons object result)
			    (cons* (cadr object)
				   (car object)
				   result))))))))))

(define (make-bob-lexer string)

  ;; Returns a procedure that returns successive tokens lexed from
  ;; STRING each time it is called.
  ;;
  ;; The returned procedure takes one argument, a receiver which is
  ;; tail-called with:
  ;; 1. the lexed token (if any),
  ;; 2. the index of the next character to be read from STRING,
  ;; 3. any arguments passed to parse-error or #f if there was no
  ;;    error.  (The first element of this list is always a string.)

  (let ((table (make-bob-lexer-table))
	(length (string-length string))
	(offset 0))

    (lambda (receiver)
      (if (>= offset length)
	  (receiver `(TOKEN END-OF-FILE) offset #f)
	  (substring->object
	   string offset length table `(TOKEN END-OF-FILE)
	   (lambda (object new-offset error-args)
	     (set! offset new-offset)
	     (cond (error
		    (receiver object new-offset error-args))
		   ((or (not (pair? object))
			(eq? (car object) 'token)
			(eq? (car object) 'keyword))
		    (receiver object offset false))
		   (else
		    ;; When does the lexer return a list of two objects?
		    (error "substring->object returned weird Bob lexeme"
			   object)))))))))

