;;;             Copyright (C) 1989, by William M. Wells III
;;;                         All Rights Reserved
;;;     Permission is granted for unrestricted non-commercial use.

(declare (usual-integrations))

;;; Stuff to read in a file containing a grammar.

;;; About the lisp representations of grammars in files:
;;; non terminals are represented by lisp symbols, terminals by
;;; lisp symbols, or strings. for example:
;;; A ::= B | C | "foo" | "c" | <the-empty-string>
;;;
;;; would be encoded:

;;; (A B)
;;; (lambda (b) ...)
;;;
;;; (A C)
;;; (lambda (c) ...)
;;;
;;; (A "foo")
;;; (lambda (foo) ...)
;;;
;;; (A "c")
;;; (lambda (c) ...)
;;;
;;; (A)
;;; (lambda () ...)

;;; The file should contain alternating representations
;;; of productions and associated parsing actions.  
;;; Each production is represented by a list.
;;; The first element of each such list represents the left hand
;;;  side of the production.
;;; The rest of each list stands for the right hand side of the
;;; production.
;;; The start symbol of the grammar will be the lhs of the first
;;; production encountered.  
;;; The symbol AUGMENTED-START is reserved and will automatically appear in
;;; a production deriving the start symbol.
;;; The symbol THE-EMPTY-STRING is also reserved. 
;;;
;;; The parsing action functions are used by the parser driver to
;;; effect syntax directed translation of the input text.  
;;; The action functions are not referenced in the parse table
;;; construction process.  As far as table construction is concerned, 
;;; they may be arbitrary forms which can be read by the function (read).
;;; The expression #f could be used as place-holder, for example.
;;;
;;; Use load-grammar to internalize a grammar in the above syntax.
;;; productions holds a list of all the productions.
;;; lambdas holds a list of all of the associated lambdas (in reverse order)
;;; non-terminals holds a list of all the non-terminals.
;;; Each non-terminal symbols has a list of the productions it
;;; appears in the left hand side of under its own-productions
;;; property.
;;; g-symbol-alist holds an alist whose cars are the string or symbol
;;;   which is read from the grammar, and whose cdrs hold corresponding
;;;   g-symbol structures; the order is in the reverse sense of symbol-array.

(define symbols) ; a list of the grammar symbols
(define symbol-array) ; indexed by the symbol's index, of g-symbols
(define productions)
(define lambdas)
(define production-count)
(define g-symbol-count)
(define g-symbol-alist)
(define start-symbol)
(define empty-string-g-symbol)
(define augmented-start-g-symbol)
(define the-end-g-symbol)

(define (initialize-grammar)
  (set! symbols '())
  (set! productions '())
  (set! lambdas '())
  (set! production-count 0)
  (set! g-symbol-count 0)
  (set! g-symbol-alist '())
  (set! start-symbol '())
  )

;;; This is sort of like interning.  returns a g-symbol. 
;;; eqv string-or-symbols -> return identical g-symbols.

(define (lookup-g-symbol string-or-symbol)
  (let ((whatever (assoc string-or-symbol g-symbol-alist)))
    (if whatever (cdr whatever)
	#f)))

(define (process-symbol string-or-symbol)
  (let ((symbol (lookup-g-symbol string-or-symbol)))
    (if (not symbol)
	(begin
	  (set! symbol
		(new-g-symbol
		 (if (string? string-or-symbol) string-or-symbol
		     (symbol->string string-or-symbol))
		 (post-inc g-symbol-count)))
	  (push (cons string-or-symbol symbol) g-symbol-alist)
	  (push symbol symbols)))
    symbol))



;;; About the internal representation of productions: they are
;;; a list of the production index followed by the lhs
;;; and then the rhs symbols represented as g-symbols
;;; They should have been represented with structs...

(define (lhs prod) (cadr prod))
(define (rhs prod) (cddr prod))


(define (production-index production)
  (car production))

;;; Do various things, fixing up global data structures and
;;; fields of grammar symbols.  A bit sleazy: start-symbol being #f
;;; is used to detect the first production.

(define (process-production input-production)
  (process-production-internal input-production #f))

(define (process-production-internal input-production internal-use?)
  (let ((symbols (map process-symbol input-production)))
    (if (not start-symbol) 
	(begin (set! start-symbol (car symbols))
	       (display "start symbols is: ")
	       (display (g-symbol-print-name start-symbol))
	       (newline)
	       (process-production-internal
		(list 'AUGMENTED-START (car input-production))
		#t)))
    (let ((production (cons (post-inc production-count) symbols)))
      (if (and (eq? (lhs production) augmented-start-g-symbol)
	       (not internal-use?))
	  (error "Sorry, AUGMENTED-START is a reserved grammer symbol" #f))
      (push production productions)
      (g-symbol-add-production (lhs production) production)
      (let ((rhs-symbol-set (make-oset 'order-fn g-symbol-order-function)))
	(for-each (lambda (gs)
		    (oset-insert! gs rhs-symbol-set))
		  (rhs production))
	(oset-for-each (lambda (gs)
			 (set-slot! (g-symbol-rhs-productions gs)
				    (cons production 
					  (g-symbol-rhs-productions gs))))
		       rhs-symbol-set)))))


(define (read-em-in )
  (let ((sxp (read))) 	     ; Read the production
    (if (eof-object? sxp) 'done
	(begin (process-production sxp)
               (push (read) lambdas)  ; Read and save the associated lambda.
	       (read-em-in)))))

;;; Internalize a grammar in the lisp syntax described above.
;;; Set up data structures as described above.
;;; Every grammar interns the empty string as a grammar symbol

(define (load-grammar filename)
  (initialize-grammar)
  (set! empty-string-g-symbol (process-symbol 'the-empty-string))
  (set! augmented-start-g-symbol (process-symbol 'augmented-start))
  (set! the-end-g-symbol (process-symbol 'the-end-g-symbol))
  (set! start-symbol #f)
  (display "reading grammar from ") (display filename) (display ", ")
  (with-input-from-file filename
    read-em-in)
  (display production-count) (display " productions,  ")
  (display g-symbol-count) (display " symbols") (newline)
  (set! symbol-array (list->vector (reverse symbols)))
  (if (null? start-symbol) (error "no start symbol" #f)))

(define (print-production prod)
  (display (car prod))
  (display ": ")
  (display (g-symbol-print-name (lhs prod)))
  (display " -> ")
  (for-each (lambda (x) (display (g-symbol-print-name x)) (display " "))
	(rhs prod)))

(define (print-productions )
  (for-each
   (lambda (x) (print-production x) (newline))
   (reverse productions)))

(define (print-symbols )
  (for-each (lambda (sym)
	  (display (g-symbol-index sym))
	  (display ": ")
	  (display (g-symbol-print-name sym))
	  (newline))
	(reverse symbols)))

(define (print-own-productions sym)
  (for-each (lambda (x) (print-production x) (newline))
	(g-symbol-own-productions sym)))

(define (print-rhs-productions sym)
  (for-each (lambda (x) (print-production x) (newline))
	(g-symbol-rhs-productions sym)))


(define (cruise-symbols )
  (for-each (lambda (sym)
	      (newline)
	      (display (g-symbol-index sym))
	      (display ": ")
	      (display (g-symbol-print-name sym))
	      (newline)
	      (if (g-symbol-own-productions sym)
		  (begin
		    (display "own productions:") (newline)
		    (print-own-productions sym)))
	      (if (g-symbol-rhs-productions sym)
		  (begin
		    (display "rhs productions:") (newline)
		    (print-rhs-productions sym)))
	      (display "----------------------------")
	      )
	    (reverse symbols)))

(define (cruise-symbols-2 )
  (newline)
  (for-each (lambda (sym)
	  (display (g-symbol-index sym))
	  (display ": ")
	  (display (g-symbol-print-name sym))
	  (newline)
	  )
	(vector->list symbol-array)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; test:
(comment-out
 (load "loadgram")
 (load-grammar "ex1.grm")
 (print-symbols)
 (cruise-symbols)
 (cruise-symbols-2)
 (print-productions)
)

;;; PC scheme requires a control-Z at the end of each source file: 
