;\c	    Copyright (C) 1990 Pertti Kellomaki
;\c	 
;\c	 This file is part of Taurus, a parser generator producing Scheme
;\c	 
;\c	 Taurus is free software; you can redistribute it and/or modify
;\c	 it under the terms of the GNU General Public License as published by
;\c	 the Free Software Foundation; either version 1, or (at your option)
;\c	 any later version.
;\c	 
;\c	 Taurus is distributed in the hope that it will be useful,
;\c	 but WITHOUT ANY WARRANTY; without even the implied warranty of
;\c	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;\c	 GNU General Public License for more details.
;\c	 
;\c	 You should have received a copy of the GNU General Public License
;\c	 along with Taurus; see the file COPYING.  If not, write to
;\c	 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
;
;\node The LL(1) Condition
;\comment  node-name,  next,  previous,  up
;\chapter{The LL(1) Condition}
;
;These procedures form the module
;
(module tll1)

;\node The Meaning of the LL(1) Condition
;\comment  node-name,  next,  previous,  up
;\section{The Meaning of the LL(1) Condition}
;
;The LL(1) condition means that it is possible to derive
;from the grammar a deterministic recursive descent parser that needs
;only one token lookahead. Deterministic here means, that in places
;where parsing can take more than one route, the lookahead token always
;determines which route to take.  For a more complete discussion, see
;for example [Waite1984], p.156.  There are two places, where
;nondeterminism can occur: alternatives and iterations.
;
;\node Checking the Grammar, , , 
;\comment  node-name,  next,  previous,  up
;\section{Checking the Grammar}
;
;The procedure \code{check-ll1} checks that the grammar satisfies the
;LL(1) condition by starting from the starting nonterminal and
;recursively checking all nonterminals that can be reached. In order to
;prevent looping, the path of visited nonterminals is maintained.
;
;\findex{check-ll1}
(define (check-ll1 grammar)
  (check-ll1-rule (starting-rule grammar) '() grammar '()))

;The procedure \code{check-ll1-rule} checks each grammar rule for
;nondeterminisms. 
;
;\findex{check-ll1-rule}
(define (check-ll1-rule rule follow-set grammar path )
  (check-ll1-expr (rule-expr rule)
		  follow-set
		  grammar
		  (nonterminal-name
		   (rule-nonterminal rule))
		  path))
		  


;Any nondeterminisms are reported using \code{report-nondeterminism}:
;
;\findex{report-nondeterminism}
(define (report-nondeterminism nonterminal-name explanation
			       common-symbols expr path)

  (define (display-terminal-list terminals get-name)
    (let loop ((terminals terminals))
      (cond ((null? terminals))
	    ((null? (cdr terminals))
	     (display (get-name (car terminals)) stderr-port))
	    (else
	     (display (get-name (car terminals)) stderr-port)
	     (display ", " stderr-port)
	     (loop (cdr terminals))))))

  (display explanation stderr-port)
  (newline stderr-port)
  (if common-symbols
      (begin
	(display "Common symbols " stderr-port)
	(display-terminal-list common-symbols terminal-name)
	(newline stderr-port)))
  (display "Rule " stderr-port)
  (display nonterminal-name stderr-port)
  (display " when reached via " stderr-port)
  (display-terminal-list (reverse path) (lambda (x) x))
  (newline stderr-port)
  (display "Expression " stderr-port)
  (display (expr->list expr) stderr-port)
  (newline stderr-port)
  (newline stderr-port))

;\node Detecting Common Symbols, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Detecting Common Symbols}
;
;Basically, checking for nondeterminism means looking for common
;symbols in some sets. This is done with \code{have-common-symbols?}.
;The procedure \code{common-symbols} returns the common symbols in sets.
;Each symbol that is a member of at least two of the sets is included
;in common symbols.
;
;\findex{have-common-symbols?}
(define (have-common-symbols? sets)
  (define (flat sets)
    (let loop ((sets sets)
	       (result '()))
      (if (null? sets)
	  result
	  (loop (cdr sets)
		(cons (car sets) result)))))
  (let loop ((terminals (flat sets)))
    (cond ((null? terminals) #f)
	  ((member (car terminals) (cdr terminals)))
	  (else (loop (cdr terminals))))))
	     

;\findex{common-symbols}
(define (common-symbols sets)

  (define (symbol-in-sets? symbol sets)
    (cond ((null? sets) #f)
	  ((member symbol (car sets)))
	  (else
	   (symbol-in-sets? symbol (cdr sets)))))

  (let loop ((common-symbols '())
	     (current-set '())
	     (sets sets))
    (cond ((null? sets)
	   (remove-duplicates common-symbols))
	  ((null? current-set)
	   (loop common-symbols
		 (car sets)
		 (cdr sets)))
	  (else
	   (if (symbol-in-sets? (car current-set) sets)
	       (loop (cons (car current-set)
			   common-symbols)
		     (cdr current-set)
		     sets)
	       (loop common-symbols
		     (cdr current-set)
		     sets))))))

;\node Checking Expressions, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Checking Expressions}
;
;Each type of expression needs its own special handling.
;
;\findex{check-ll1-expr}
(define (check-ll1-expr expr follow-set grammar
			current-nonterminal-name path)
  (cond ((or (terminal? expr)
	     (action? expr)
	     (empty? expr)))
	((nonterminal? expr)
	 (check-ll1-nonterminal expr follow-set grammar
				current-nonterminal-name path))
	((sequence? expr)
	 (check-ll1-sequence expr follow-set grammar
			     current-nonterminal-name path))
	((alternative? expr)
	 (check-ll1-alternative expr follow-set grammar
				current-nonterminal-name path))
	((or (zero-iteration? expr)
	     (nonzero-iteration? expr))
	 (check-ll1-iteration expr follow-set grammar
			      current-nonterminal-name path))
	(else (taurus-error "check-ll1-expr: bad expression "
			    expr))))


;\node Nondeterminism In Sequences, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Nondeterminism In Sequences}
;
;Sequences themselves are always deterministic, because there is no
;choosing involved. Thus, only the elements of a sequence need to be
;checked for nondeterminism. Checking is done from right to left,
;because the \code{follow-set} is accumulated at the same time. If the
;follow set of one of the elements includes \code{empty}, the follow
;set of the next element "shows thru".
;
;\findex{check-ll1-sequence}
(define (check-ll1-sequence expr follow-set grammar
			    current-nonterminal-name path)
  (let loop ((elements (reverse (sequence-elements expr)))
	     (follow-set follow-set))
    (cond ((not (null? elements))
	   (check-ll1-expr (car elements) follow-set grammar
			   current-nonterminal-name path)
	   (loop (cdr elements)
		 (if (member (make-empty)
			     (first-set (car elements) grammar))
		     (append (first-set (car elements) grammar)
			     follow-set)
		     (first-set (car elements) grammar)))))))
    

;\node Nondeterminism In Alternatives, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Nondeterminism In Alternatives}
;
;If the FIRST sets of the choices in an alternative expression have
;terminal symbols in common, nondeterminism arises. There is no way for the
;parser automaton to decide, which alternative to choose. When
;generating the parser, the nondeterminism is reported.
;
;If one of the FIRST sets includes \code{empty}, the \dfn{FOLLOW} set
;{}(the set of terminal symbols that can follow the expression) must also
;be considered, because the symbols in it can also appear in
;the lookahead at that time.
;
;Each alternative is also checked individually.
;
;\findex{check-ll1-alternative}
(define (check-ll1-alternative expr follow-set grammar
			       current-nonterminal-name path)
  (let* ((first-sets (map (lambda (choice)
			    (first-set choice grammar))
			  (alternative-choices expr)))
	 (includes-empty
	  (let loop ((sets first-sets))
	    (cond ((null? sets) #f)
		  ((member (make-empty) (car sets)))
		  (else (loop (cdr sets)))))))
    (if (have-common-symbols? first-sets)
	(report-nondeterminism
	 current-nonterminal-name
	 "Choices of an alternative expression have common starting symbols."
	 (common-symbols first-sets)
	 expr
	 path)
	#f)
    (if (and includes-empty
	     (have-common-symbols?
	      (cons follow-set (remove-duplicates first-sets))))
	(report-nondeterminism
	 current-nonterminal-name
	 "The follow set and some of the choices have common starting symbols."
	 (common-symbols (cons follow-set first-sets))
	 expr
	 path)
	#f)
    (map (lambda (expr) (check-ll1-expr expr follow-set grammar
					current-nonterminal-name path))
	 (alternative-choices expr))))

;\node Nondeterminism In Iterations, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Nondeterminism In Iterations}
;
;Nondeterminism in iterations can be caused by two separate causes:
;ambiguity in termination of an iteration and iteration of an empty
;expression.
;
;Iteration is analogous to looping, so it can be reduced to self
;recursive nonterminals much the same way loops can be reduced to
;recursive function calls. From the reduced form it is easy to see how
;nondeterminism arises. If we convert
;\begin{example}
;{}(FOO (iter+ EXPR))
;\end{example}
;to
;\begin{example}
;{}(FOO EXPR (alt FOO empty))
;\end{example}
;\noindent
;{}(iterating \code{EXPR} one or more times is converted to
;expecting \code{EXPR} once and then expecting either \code{empty}, ie.
;just leaving the expression, or expecting \code{FOO} again), the
;potential nondeterminism is readily seen. If FIRST set of \code{EXPR} and
;the next expression have terminal symbols in common, nondeterminism arises.
;This leads to the same situation as in the previous section.
;
;Another problem with iterations appears when the iterand can reduce to
;\code{empty}, because the iteration could be continued forever. This
;kind of nondeterminism is easy to find, because it does not depend on
;the context of the expression.
;
;The iterand is also checked.
;
;\findex{check-ll1-iteration}
(define (check-ll1-iteration expr follow-set grammar
			     current-nonterminal-name path)
  (let ((set (first-set (iteration-iterand expr) grammar)))

    (check-ll1-expr (iteration-iterand expr)
		    (append set follow-set) grammar
		    current-nonterminal-name path)

    (cond ((member (make-empty) set)
	   (report-nondeterminism
	    current-nonterminal-name
	    "Iteration of empty expression."
	    '()
	    expr
	    path))
	  ((have-common-symbols? (append set follow-set))
	   (report-nondeterminism
	    current-nonterminal-name
	    "Ambiguity in ending iteration."
	    (common-symbols (list set follow-set))
	    expr
	    path)))))

;\node Handling of Nonterminals, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Handling of Nonterminals}
;
;Nondeterminism can arise even if all the expressions in the grammar
;are internally deterministic. This is caused by the interaction of
;rules. Whether an expression is deterministic or not, depends on the
;contex where it appears, ie. its FOLLOW set.
;
;The problem is, that the FOLLOW set is different in each place
;where the nonterminal appears. The expression defining a nonterminal
;must therefore be checked every time the nonterminal appears in an
;expression. In order to prevent looping, the path is checked. If this
;occurence of the nonterminal is already twice in the path, it is not
;checked any more. We can not quit after the first occurence, because
;rules can be recursive.
;
;\findex{check-ll1-nonterminal}
(define (check-ll1-nonterminal expr follow-set grammar
			       current-nonterminal-name path)

  (define (count item lst)
    (cond ((null? lst) 0)
	  ((eq? item (car lst))
	   (+ 1 (count item (cdr lst))))
	  (else
	   (count item (cdr lst)))))
  
  (cond ((< (count expr path) 2)
	 (check-ll1-expr (rule-expr (rule-for expr grammar))
			 follow-set grammar
			 (nonterminal-name expr)
			 (cons expr path)))))
