;;; $Id: read.scm,v 1.1.1.1 1993/06/24 10:06:03 queinnec Exp $
;;; Copyright (c) 1990-93 by Christian Queinnec. All rights reserved.
;;;oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
;;;                        LiSP2TeX
;;;   Christian Queinnec             or to:  Christian Queinnec
;;;   <queinnec@polytechnique.fr>            <Christian.Queinnec@inria.fr>
;;;   Laboratoire d'Informatique de l'X      INRIA -- Rocquencourt
;;;   Ecole Polytechnique                    Domaine de Voluceau, BP 105
;;;   91128 Palaiseau                        78153 Le Chesnay Cedex
;;;   France                                 France
;;;oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

;;; This program is distributed in the hope that it will be useful.
;;; Use and copying of this software and preparation of derivative works
;;; based upon this software are permitted, so long as the following
;;; conditions are met:
;;;      o credit to the authors is acknowledged following current
;;;        academic behaviour
;;;      o no fees or compensation are charged for use, copies, or
;;;        access to this software
;;;      o this copyright notice is included intact.
;;; This software is made available AS IS, and no warranty is made about
;;; the software or its performance.

;;;oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
;;; This file contains a Scheme Reader which I hope is conformant to 
;;; Scheme Standard (but for float and complex numbers). I also added
;;; #\tab and #\return as whitespaces.

;;; The functions read-<something> all have a similar structure:
;;; they take a character (or eof) as first argument, the
;;; stream from which they read and they return whatever 
;;; object is read. 
;;; They are usually programmed as a test against eof followed
;;; by a big case expression driven by the current character.

;;; Whenever an end of file is found, the reader tries to finish
;;; its work and usually return this end-of-file or report an error
;;; if the read object is not complete.

;;;ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
;;; Various constants.

;;; some error messages when reading unparsable things.

(define too-much-closing-bracket
  "Superfluous closing bracket" )
(define premature-eof-in-string
  "Stream ends up while reading a string" )
(define premature-eof-in-symbol
  "Stream ends up while reading a symbol" )
(define premature-eof-in-barred-symbol
  "Stream ends up while reading a barred symbol" )
(define premature-eof-in-list
  "Stream ends up while reading a list" )
(define incorrect-dots
  "Two contiguous dots" )
(define too-long-dotted-list
  "Incorrect dotted (too long) list" )
(define too-short-dotted-list
  "Incorrect dotted (too short) list" )
(define premature-eof-in-sharp
  "Stream ends up while reading a sharp character" )
(define incorrect-character-name
  "Incorrect name of character" )
(define invalid-dispatch-character
  "Invalid dispatch character" )
(define premature-eof-in-vector
  "Stream ends up while reading a vector" )
(define dot-in-vector
  "Vectors cannot be dotted" )
(define premature-eof-in-number
  "Stream ends up while reading a number" )
(define incorrect-figure-in-number
  "Incorrect figure while reading number" )

;; Named characters that are recognized. Case is not sigificant.

(define named-characters
  (list (cons "space"   32)
        (cons "tab"      9)
        (cons "return"  13)
        (cons "newline" 10) ) )

;; The only numbers that are recognized (it is also possible to use
;; lower cases as well as to mix cases).

(define correct-figures "0123456789ABCDEF")


;;;ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
;;; This huge function creates a reader, streams are assumed to know
;;; how to answer to peek-char, read-char and read-error.

(define (make-reader peek-char read-char)

  ;; common-zone is a working area where are collected characters of
  ;; symbols or string while being scanned. This area is always reused.
  (define common-zone (make-Extensible-String 100))
  
  ;; It is reused all the time so it is necessary to reset it.
  (define (reset-common-zone)
    (set-Extensible-String-index! common-zone 0)
    common-zone )

  ;; pipe two operations in one: consume the head of the stream 
  ;; and peek the next character. At any time the first character of 
  ;; stream is the poken char.
  (define (read-then-peek-char stream)
    (read-char stream)
    (peek-char stream) )
  
  (lambda (kind stream read-error)   
    ;; If kind is 'read then read and return the read object.
    ;; If kind is 'skip then skip whitespaces until an object starts.
    ;; stream is the stream to read,
    ;; (read-error stream message . culprits) is the function to call
    ;; when a reading error occurs. Message indicates the type of the
    ;; error while culprits may hold additional informations.

    ;; Two operations in one: skip whitespaces then read an object. 
    (define (skip-and-read-object char stream)
      (read-object (skip-until-object char stream) stream) )

    ;; skip whitespace and comments until finding a character that
    ;; begins an expression, return that very character.
    (define (skip-until-object char stream)
      (if (eof-object? char)
          char
          (case char
            ((#\space #\newline #\tab #\return) ; some are not in R3RS
             (skip-until-object (read-then-peek-char stream) stream) )
            ((#\;)
             (read-after-end-of-line 
              (read-then-peek-char stream) stream skip-until-object ) )
            (else char) ) ) )
 
    ;; Read an object, the first character of which is char.
    (define (read-object char stream)
      (if (eof-object? char)
          char
          (case char
            ((#\')
             (let ((o (skip-and-read-object 
                       (read-then-peek-char stream) stream )))
               (if (eof-object? o) o `(quote ,o)) ) )
            ((#\`)
             (let ((o (skip-and-read-object 
                       (read-then-peek-char stream) stream )))
               (if (eof-object? o) o (list 'quasiquote o)) ) )
            ((#\,)
             (let ((ch (read-then-peek-char stream)))
               (case ch
                 ((#\@)  ; ,@ must be contiguous
                  (let ((o (skip-and-read-object (read-then-peek-char stream) 
                                                 stream )))
                    (if (eof-object? o) o 
                        (list 'unquote-splicing o) ) ) )
                 (else 
                  (let ((o (skip-and-read-object ch stream)))
                    (if (eof-object? o) o 
                        (list 'unquote o )) ) ) ) ) )
            ((#\()
             (skip-and-read-list (read-then-peek-char stream) stream) )
            ((#\))
             ;; consume the offending parenthesis and report an error.
             (read-char stream)		
             (read-error stream too-much-closing-bracket) )
            ((#\")
             (read-string (read-then-peek-char stream) 
                          stream
                          (reset-common-zone) ) )
            ((#\#)
             (read-sharp-object (read-then-peek-char stream) stream) )
            ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
             (read-number-or-symbol 
              char stream (reset-common-zone) #f #t ) )
            ((#\+ #\-)
             (read-number-or-symbol 
              (read-then-peek-char stream) 
              stream 
              (increment-string char (reset-common-zone))
              char 
              #f ) )
            (else
             (read-symbol char stream (reset-common-zone)) ) ) ) )

    ;; skip characters until the end of the line then resume the
    ;; fn function on the next character of the next line.
    (define (read-after-end-of-line char stream fn)
      (if (eof-object? char)
          char
          (case char
            ((#\newline)
             (fn (read-then-peek-char stream) stream) )
            (else (read-after-end-of-line 
                   (read-then-peek-char stream) stream fn )) ) ) )

    ;; Read a string. Characters are accumulated in the common area
    ;; using the increment-string function.  When the string is
    ;; finished the common area is finalized to return a string.
    (define (read-string char stream string)
      (if (eof-object? char)
          (read-error stream premature-eof-in-string)
          (case char
            ((#\") (read-char stream)
                   (finalize-string string) )
            ((#\\)
             (read-char stream)         ; consume the backslash
             (let ((ch (read-char stream)))
               (when (eof-object? ch)
                 (read-error stream premature-eof-in-string) )
               (increment-string ch string)
               (read-string (peek-char stream) stream string) ) )
            (else (increment-string char string)
                  (read-string 
                   (read-then-peek-char stream) stream string ) ) ) ) )

    ;; Read a symbol (backslash and bar are allowed).
    ;; Note that dot and quote do not finish a symbol.
    (define (read-symbol char stream symbol)
      (if (eof-object? char)
          (finalize-symbol symbol)
          (case char
            ((#\space #\newline #\tab #\return #\( #\) #\; #\")
             (finalize-symbol symbol) )
            ((#\\)
             (read-char stream)         ; consume the backslash
             (let ((ch (read-char stream)))
               (when (eof-object? ch)
                     (read-error stream premature-eof-in-symbol) )
               (increment-symbol ch symbol #t)
               (read-symbol (peek-char stream) stream symbol) ) )
            ((#\|)
             (read-barred-symbol (read-then-peek-char stream) 
                                 stream symbol ) )
            (else 
             (increment-symbol char symbol #f)
             (read-symbol
              (read-then-peek-char stream) stream symbol ) ) ) ) )

    ;; Read the part of a symbol which is between bars.
    (define (read-barred-symbol char stream symbol)
      (if (eof-object? char)
          (read-error stream premature-eof-in-barred-symbol)
          (case char
            ((#\|) 
             (let ((newchar (read-then-peek-char stream)))
               (when (eof-object? newchar)
                     (read-error stream premature-eof-in-barred-symbol) )
               (case newchar
                 ((#\|)
                  (increment-symbol newchar symbol #t)
                  (read-barred-symbol (read-then-peek-char stream)
                                      stream symbol ) )
                 (else (read-symbol newchar stream symbol)) ) ) )
            ((#\\)
             (read-char stream)         ; consume the backslash
             (let ((ch (read-char stream)))
               (when (eof-object? ch)
                 (read-error stream premature-eof-in-symbol) )
               (increment-symbol ch symbol #t)
               (read-barred-symbol 
                (peek-char stream) stream symbol ) ) )
            (else (increment-symbol char symbol #t)
                  (read-barred-symbol
                   (read-then-peek-char stream) stream symbol ) ) ) ) )

    ;; skip whitespaces to read the content of a list
    (define (skip-and-read-list char stream)
      (if (eof-object? char)
          (read-error stream premature-eof-in-list)
          (case char
            ((#\space #\newline #\tab #\return)
             (skip-and-read-list (read-then-peek-char stream) stream) )
            ((#\;)
             (read-after-end-of-line
              (read-then-peek-char stream) stream skip-and-read-list ) )
            (else (read-list char stream)) ) ) )
    
    ;; Reads a list
    (define (read-list char stream)
      (case char
        ((#\))
         (read-char stream)             ; consume the close bracket
         '() )
        ((#\.)
         (let ((ch (read-then-peek-char stream)))
           (if (eof-object? ch)
               (read-error stream premature-eof-in-list)
               (case ch
                 ((#\.)
                  (let ((other-ch (read-then-peek-char stream)))
                    (if (eof-object? other-ch)
                        (read-error stream premature-eof-in-list)
                        (case other-ch
                          ((#\.)
                           (read-char stream) ; consumes third dot
                           (let ((symbol (reset-common-zone)))
                             (increment-symbol ch symbol #t)
                             (increment-symbol ch symbol #t)
                             (increment-symbol ch symbol #t)
                             (let* ((s (finalize-symbol symbol))
                                    (d (skip-and-read-list (peek-char stream)
                                                           stream )) )
                               (if (eof-object? d)
                                   d
                                   (cons s d) ) ) ) )
                          (else (read-error stream incorrect-dots)) ) ) ) )
                 (else
                  (read-end-of-list 
                   (skip-and-read-list ch stream) 
                   stream ) ) ) ) ) )
        (else 
         (let ((a (skip-and-read-object char stream)))
           (if (eof-object? a)
               a
               (let ((d (skip-and-read-list (peek-char stream) stream)))
                 (if (eof-object? d)
                     d
                     (cons a d) ) ) ) ) ) ) )

    ;; Check if what was read is a regular end of list ie is a list
    ;; with exactly one term otherwise report an error.
    (define (read-end-of-list expressions stream)
      (if (pair? expressions)
          (if (pair? (cdr expressions))
              (read-error stream too-long-dotted-list expressions)
              (car expressions) )
          (read-error stream too-short-dotted-list expressions) ) )

    ;; Read a sharp notation
    (define (read-sharp-object char stream)
      (case char
        ((#\\)
         (read-until-delimiter
          (read-then-peek-char stream)
          stream
          (reset-common-zone)
          (lambda (chars delimiter)
            (case (string-length chars)
              ((0) (read-char stream))  ; consume the delimiter
              ((1) (string-ref chars 0))
              (else 
               (let ((value (let search ((nmchs named-characters))
                              (if (pair? nmchs)
                                  (if (string-ci=? (caar nmchs) chars)
                                      (car nmchs)
                                      (search (cdr nmchs)) )
                                  #f ) )))
                      (if value 
                          (integer->char (cdr value))
                          (read-error 
                           stream incorrect-character-name chars ) ) )) ) ) ) )
        ((#\t #\T) (read-char stream) #t)
        ((#\f #\F) (read-char stream) #f)
        ((#\b #\B)
         (read-number (read-then-peek-char stream) stream 2 0) )
        ((#\o #\O)
         (read-number (read-then-peek-char stream) stream 8 0) )
        ((#\d #\D)
         (read-number (read-then-peek-char stream) stream 10 0) )
        ((#\x #\X)
         (read-number (read-then-peek-char stream) stream 16 0) )
        ((#\()
         (let ((items
                (skip-and-read-vector (read-then-peek-char stream) stream) ))
           (apply vector items) ) )
        (else (read-char stream)
              (read-error stream invalid-dispatch-character char) ) ) )

    ;; Returns a string of all characters until a delimiter.
    ;; Useful to read a named character.
    (define (read-until-delimiter char stream string cont)
      (if (eof-object? char)
          (read-error stream premature-eof-in-sharp)
          (case char
            ((#\( #\) #\; #\" #\space #\newline #\tab #\return)
             (cont (finalize-string string) char) )
            (else (increment-string char string)
                  (read-until-delimiter
                   (read-then-peek-char stream)
                   stream
                   string
                   cont )) ) ) )
           
    ;; Skip whitespaces to read the content of a vector.
    (define (skip-and-read-vector char stream)
      (if (eof-object? char)
          (read-error stream premature-eof-in-vector)     
          (case char
            ((#\space #\newline #\tab #\return)
             (skip-and-read-vector (read-then-peek-char stream) stream) )
            ((#\;)
             (read-after-end-of-line
              (read-then-peek-char stream) stream skip-and-read-vector ) )
            (else (read-vector char stream)) ) ) )

    ;; read a vector
    (define (read-vector char stream)
      (case char
        ((#\))
         (read-char stream)
         '() )
        ((#\.)
         (read-char stream)             ; consume the dot
         (read-error stream dot-in-vector) )
        (else 
         (let ((item (skip-and-read-object char stream)))
           (cons item (skip-and-read-vector (peek-char stream) 
                                            stream )) ) ) ) )

    ;; Read a number
    (define (read-number char stream base result)
      (if (eof-object? char)
          (or result (read-error stream premature-eof-in-number))
          (let ((figure (convert-figure char base 0)))
            (if figure
                (read-number (read-then-peek-char stream) 
                             stream base (+ figure (* base result)) )
                ;; numbers are finished by the first non-figure character 
                ;; This is questionable !?
                result ) ) ) )

    ;; Convert a string to a number
    (define (convert-figure char base result)
      (if (< result base)
          (if (char-ci=? char (string-ref correct-figures result))
              result
              (convert-figure char base (+ 1 result)) )
          #f ) )

    ;; It is not yet known if we are reading a number or a symbol.
    ;; string is the extensible string where are stored characters
    ;; sign is #f or #\+ or #\- if the sign has been read
    ;; number? is #t or #f if at least one figure has been read (useful 
    ;; to read + as a symbol and +0 as zero).
    (define (read-number-or-symbol char stream string sign number?)
      (case char
        ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
         (increment-string char string)
         (read-number-or-symbol 
          (read-then-peek-char stream) stream string sign #t ) )
        ((#\space #\newline #\tab #\return #\( #\) #\;)
         (if number?
             (do ((i (if sign 1 0) (+ 1 i))
                  (char 'wait)
                  (result 0) )
                 ((= i (extensible-string-index string))
                  (case sign
                    ((#\-) (- result))
                    (else result) ) )
               (set! char (string-ref (extensible-string-characters string) i))
               (set! result (+ (convert-figure char 10 0) (* 10 result))) )
             (read-symbol char stream string) ) )
        (else
         (read-symbol char stream string) ) ) )

    ;; The final value of (make-reader ...) is
    (case kind
      ((read) (skip-and-read-object (peek-char stream) stream))
      ((skip) (skip-until-object (peek-char stream) stream)) ) ) )

;;; small tests:
;;; (define (test)(%display (%read (current-input-port))(current-output-port)))
;;; (test) a
;;; (test) 23
;;; (test) (a b)
;;; (test) ((a b))
;;; (test) "foo"
;;; (test) (a b . c)
;;; (test) #(1 2 3)
;;; (test) #t
;;; (test) #f
;;; (test) ()
;;; (test) 'foo
;;; (test) `(a ,b ,@c (d . ,e) . ,@f)
;;; (test) #\A
;;; (test) #\a
;;; (test) #\SpaCe

;;; end of reader.scm

;;; end of read.scm
