#!/usr/local/kalypso
;
; lola
;
; ll parser table generator
;
; the format of the grammar is:
;
; ((non-terminal (symbol symbol "action") (production) (production))
;  (non-terminal (production) (production) (production))
;  )
;

;
; these two dictionaries cache results from (first) and (follow)
; to speed the parser generator
;

(setq first-dictionary (new-dictionary))
(setq follow-dictionary (new-dictionary))

;
; data abstraction
;
; a non terminal is a symbol bound to a list of lists
; a terminal is a symbol not bound to a list
; an action is a string
;

(defun non-terminal? (item)
  (and (symbol? item) 
       (bound? item)
       (list? (eval item))
       )
  )

(defun terminal? (item)
  (or (null? item)
      (and (symbol? item)
	   (not (non-terminal? item))
	   )
      )
  )

(defun action? (item)
  (string? item)
  )

(defun null-production (p)
  (if (nil? p)
      t
   elseif (string? (car p))
      (null-production (cdr p))
   else
      nil
      )
  )

(defun start-symbol? (item) (equal? item start-symbol))

;
; this next section generates the 'first' set for
; a list of symbols
;

;
; generate the first set for a collection of productions
;  this is used to generate the first set for a particular
;  non-terminal usually
;
(defun first-set (productions)
  (if (nil? productions)
      nil
   elseif (null-production (car productions)) then
      (cons nil (first-set (cdr productions)))
   else
      (conc (first (car productions))
	    (first-set (cdr productions))
	    )
      )
  )

(setq first-list nil)

;
; generate the first set for a single symbol
;  This is easy for a terminal -- the result
;  is the item itself.  For non-terminals,
;  the first set ends up being union of the first
;  sets of all the productions that derive the
;  non-terminal
;
; Note that this also checks to see if the grammar
; is left-recursive.  This will succeed because
; a left recursive grammar will always re-reference
; a particular non-terminal when trying to generate
; a first set containing it.
;
(defun first-for-symbol (item)
  (if (member? item first-list)
      (error (strcat "lola: left-recursive grammar for symbol " (sprint item)))
      )
  ((lambda (first-list)
     (if (terminal? item) then
	 (list item)
      elseif (non-terminal? item) then
	 (unique (first-set (eval item)))
	 )
     )
   (cons item first-list))
  )

;
; generate the first list for a production.
;
; the first list is the set of symbols which are legal
; as the first symbols in some possible expansion of the
; production.  The cases are simple:
;
; if the (car production) is a terminal, then obviously
; the only possible first symbol is that terminal
;
; Otherwise, generate the first lists for *all* expansions
; of the non-terminal (car production).  If that list doesn't
; contain an epsilon production 'nil, the we're done.  Otherwise,
; this set must be added to the first set of (cdr production) because
; some of the possible expansions of the production will not have any
; terminals at all from (car production).
;
; Note the crufty use of dictionaries to save old expansion of first
; sets.  This is because both ll and follow call first quite often,
; frequently for the same production
;

(defun first (production)
  (let ((cache (symbol production first-dictionary)) (ret))
    (setq ret
    (if (bound? cache)
	(eval cache)
     else
	(set cache
  	      (if production
      		  (if (action? (car production))
	  	      (first (cdr production))
       		   else
      	  	      (let ((first-first))
	    		(setq first-first (first-for-symbol (car production)))
	    		(if (member? nil first-first)
	    		    (conc (delete 'nil first-first)
				  (first (cdr production))
				  )
			 else
			  first-first
			  )
	    		)
	  	      )
   	       else
      		  '(nil)
      		  )
	      )
	)
    )
    ret
    )
  )
	
;
; extract the string elements from a list (top-level only)
;

(defun clip-out-string (l)
  (if l
      (if (string? (car l))
	  (clip-out-string (cdr l))
       else
	  (cons (car l) (clip-out-string (cdr l)))
	  )
   else
      nil
      )
  )

;
; this section generates the 'follow' set for a non-terminal
;

;
; remove the nil? entries in a list
;
(defun remove-nil (stuff)
  (cond (stuff
	 (cond ((car stuff)
		(cons (car stuff) (remove-nil (cdr stuff)))
		)
	       (t
		(remove-nil (cdr stuff))
		)
	       )
	 )
	(t nil)
	)
  )

;
; generate the follow set of a for an item in a particular
; production which derives a particular non-terminal.
; This is obviously nil if the production does
; not contain the item.  Otherwise, it is the first set
; for the portion of the production which follows the
; item -- if that first set contains nil, then the
; follow set also contains the follow set for the
; non-terminal which is derived by the production
;

(defun follow-in-production (item non-terminal production non-terminals)
  (let ((rest) (firsts))
    (cond ((setq rest (member? item production))
	   (setq rest (cdr rest))
	   (setq firsts (first rest))
	   (cond ((member? nil firsts)
		  (conc (remove-nil firsts)
 		  	(follow non-terminal non-terminals)
			)
		  )
		 (t firsts)
		 )
	   )
	  (t nil)
	  )
    )
  )
    
;
; loop through the productions of a non-terminal adding
; the follow sets for each one.  Note that this will often
; generate duplicate entries -- as possibly many of the
; follow sets for productions will contain the entire follow
; set for the non-terminal
;
(defun follow-in-non-terminal (item non-terminal productions non-terminals)
  (cond (productions
	 (conc (follow-in-production item
 	 			     non-terminal
				     (car productions)
				     non-terminals)
	       (follow-in-non-terminal item
				       non-terminal
				       (cdr productions)
				       non-terminals
				       )
	       )
	 )
	(t nil)
	)
  )

;
; compute the follow set for an item out of a list
; of non-terminals -- this is the real routine which
; follow calls with the extra argument:  non-terminal-list
;
(defun follow-set (item non-terminal-list non-terminals)
  (cond (non-terminal-list
	 (cond ((equal? (car non-terminal-list) item)
		(follow-set item
			    (cdr non-terminal-list)
			    non-terminals
			    )
		)
	       (t
	 	(conc (follow-in-non-terminal item
			       	       	      (car non-terminal-list)
			       	       	      (eval (car non-terminal-list))
				       	      non-terminals
			       	       	      )
	       	      (follow-set item
		       		  (cdr non-terminal-list)
		       		  non-terminals
		       		  )
	       	      )
		)
	       )
	 )
	(t nil)
	)
  )

(setq follow-list nil)

;
; generate the follow set for a particular item in a list
; of non-terminals.  The only special case is for the
; start symbol who's follow set also contains the
; end-token
;
(defun follow (item non-terminals)
  (let ((cache (symbol (list item non-terminals) follow-dictionary)))
    (cond ((bound? cache)
	   (eval cache)
	   )
	  (t
	   (cond ((member? item follow-list)
		  ;
		  ; I don't know if this is a fatal grammar
		  ; error -- Aho, Ullman and Seti do not
		  ; give a very explicit algorithm for
		  ; generating a follow set.
		  ;
		  (fpatom stderr
			  "lola: recursive follow set for symbol "
			  (sprint item)
			  "\n"
			  )
	   	  nil
		  )
		 (t
	   	  (set cache
		       ((lambda (follow-list)
		   	  (unique
		    	   (cond ((start-symbol? item)
			   	  (cons end-token (follow-set item
						       	      non-terminals
						       	      non-terminals
						       	      )
				 	)
			   	  )
			  	 (t
			   	  (follow-set item
				       	      non-terminals
				       	      non-terminals
				       	      )
			   	  )
			  	 )
		    	   )
		   	  )
		 	(cons item follow-list)
		 	)
		       )
		  )
		 )
	   )
	  )
    )
  )

;
; strip out duplicates from a list
;
(defun unique (stuff)
  (if stuff
      (let ((rest (unique (cdr stuff))))
	(if (member? (car stuff) rest)
	    rest
	 else
	    (cons (car stuff) rest)
	    )
	)
      )
  )

;
; this makes an entry in the output list, this is just one
; of many possible formats
;

(defun make-entry (terminal non-terminal production)
  (let ((ret (list (list terminal non-terminal) production)))
;    (patom "one entry is ") (print ret) (terpr) (fflush stdout)
    ret
    )
  )

;
; generate the table entries for a particular production
; this is taken directly from Aho, Ullman and Seti
;
; Note: this function uses dynamic scoping -- both non-terminal
; and non-terminals are expected to have been set by the caller
;
(defun ll-one-production (production)
  (let ((firsts (first production))
	(ret)
	(follows)
	)
;    (patom "first-set for production ") (print production)
;    (patom ":\n\t")
;    (print firsts)
;    (terpr)
    (while firsts
	   ;
	   ; if the first set contains nil then the production
	   ; can derive epsilon -- in this case, the entire
	   ; follow set will drive an epsilon production
	   ;
      	   (if (nil? (car firsts)) then
	       (setq follows (follow non-terminal non-terminals))

;	       (patom "follow-set for production ") (print production)
;	       (patom ":\n\t")
;	       (print follows)
;	       (terpr)
	       (while follows
	       	      (setq ret (cons (make-entry (car follows)
					   	  non-terminal
					   	  production
					   	  )
			       	      ret
			       	      )
		     	    )
	       	      (setq follows (cdr follows))
	       	      )
     	    elseif (terminal? (car firsts))
	       (setq ret (cons (make-entry (car firsts)
				    	   non-terminal
				    	   production
				    	   )
			       ret
			       )
	      	     )
	       )
      	   (setq firsts (cdr firsts))
      	   )
    ret
    )
  )

;
; generate the table entries for all productions of
; a particular non-terminal
;
(defun ll-one-non-terminal (non-terminal productions)
  (if productions
      (conc (ll-one-production (car productions))
	    (ll-one-non-terminal non-terminal (cdr productions))
	    )
   else
      nil
      )
  )

;
; generate the table entries for all the non-terminals
;
(defun ll-non-terminals (non-terminal-list start-symbol)
  (if non-terminal-list
;      (patom "non-terminal ") (print (car non-terminal-list)) (terpr)
      (conc (ll-one-non-terminal (car non-terminal-list)
				 (eval (car non-terminal-list))
				 )
	    (ll-non-terminals (cdr non-terminal-list) start-symbol)
	    )
   else
      nil
      )
  )

;
; convert a grammar in input/output form to
; a list of non-terminals each bound to the associated
; list of productions
;
(defun to-non-terminal-list (grammar)
  (if grammar
      (let ((name (caar grammar)))
	(set name (cdar grammar))
	(cons name (to-non-terminal-list (cdr grammar)))
	)
   else
      nil
      )
  )

;
; verify that a parse table has a unique (token state) -> (production)
; for each (token state) pair
;

(defun verify-unique (parse-table)
  (let ((entries (new-dictionary)))
    (while parse-table
	   (cond ((dictionary-lookup entries (caar parse-table))
		  (fpatom stderr
			  "lola: ambiguous grammar at "
			  (sprint (caaar parse-table) (cadaar parse-table))
			  "\n\t"
			  (sprint (cadar parse-table))
			  "\n\t"
			  (sprint (cadr (dictionary-lookup entries
			  				   (caar parse-table)
							   )
					)
				  )
			  "\n")
		  )
		 (t
		  (dictionary-insert entries
				     (caar parse-table)
				     (car parse-table)
				     )
		  )
		 )
	   (setq parse-table (cdr parse-table))
	   )
    )
  )

;
; produce a parse table for the given grammar
;
(defun ll (grammar start-symbol end-token)
  (let ((non-terminals) (parse-table))
    (setq non-terminals (to-non-terminal-list grammar))
    (setq parse-table (ll-non-terminals non-terminals start-symbol))
    (verify-unique parse-table)
    parse-table
    )
  )

;
; user interface portion
;

;
; store the input tokens in a different dictionary to
; avoid collisions with previously bound names
;

(setq parse-dictionary (new-dictionary))

(setq end-token (symbol "$" parse-dictionary))

(defun ll-file (file-in)
  (setq grammar (fread-dictionary file-in parse-dictionary))
  (setq start-symbol (caar grammar))
  (ll grammar (caar grammar) end-token)
  )

(defun extract-terminals-nonunique (table)
  (if table
      (cons (caaar table) (extract-terminals-nonunique (cdr table)))
   else
      nil
      )
  )

(defun extract-terminals (table)
  (unique (extract-terminals-nonunique table))
  )
	       
(defun cadaar (l) (car (cdr (car (car l)))))

(defun extract-non-terminals-nonunique (table)
  (if table
      (cons (cadaar table) (extract-non-terminals-nonunique (cdr table)))
   else
      nil
      )
  )

(defun extract-non-terminals (table)
  (unique (extract-non-terminals-nonunique table))
  )

(defun print-no-nils (l)
  (cond ((list? l)
	 (patom "(")
	 (while l
		(print-no-nils (car l))
		(setq l (cdr l))
		(cond (l (patom " ")))
		)
	 (patom ")")
	 )
	(t (print l))
	)
  )

(defun print-lists (lists)
  (cond (lists
	 (print-no-nils (car lists))
	 (terpr)
	 (print-lists (cdr lists))
	 )
	)
  )

(defun print-table (table)
  (patom "(\n")
  (print-lists table)
  (patom ")\n")
  )

(setq file-in stdin)

(if argv
    (setq file-in (fopen (car argv) 'r))
    (if (nil? file-in)
	(error (strcat "lola: can't open " (sprint (car argv))))
	)
    )

(setq table (ll-file file-in))
(print (extract-terminals table)) (terpr)
(print (extract-non-terminals table)) (terpr)
(print-table table)
