;;; process grammars

;;; interface for translating sps grammars into form suitable for
;;; LL parser-builder

;;; top-level entry:  define-grammar

(extend-syntax (define-grammar)
  [(define-grammar grammar-name start-symbol
     (prod-name (lhs rhs) vars action)
     ...) 
   (with ((ans
	    (and*
	      (check-grammar
		'((prod-name (lhs rhs) vars action) ...)))))
     (if 'ans
       (let
	 ((scanner (make-scanner '(rhs ...)))
	  (parser (make-parser
		   'start-symbol
		   (list		; production in form for LL-builder
		     (list 'lhs
			   (translate-rhs 'rhs)
			   (lambda vars action))
		     ...))))
	 (begin
	   (set! scanner-2 scanner)
	   (set! grammar-name
	     (lambda (string)
	       '(printf "input string: ~s~%" string)
	       (let ((item-list (scanner string)))
		 '(printf "item-list: ~s~%" item-list)
		 (parser item-list))))
	   (declare-unchecked grammar-name () (-> (seq string) start-symbol))
	   (printf "grammar ~s checked~%" 'grammar-name)))))])

(extend-syntax (define-unchecked-grammar)
  [(define-unchecked-grammar grammar-name start-symbol
     (prod-name (lhs rhs) vars action)
     ...)
   (let
	 ((scanner (make-scanner '(rhs ...)))
	  (parser (make-parser
		   'start-symbol
		   (list		; production in form for LL-builder
		     (list 'lhs
			   (translate-rhs 'rhs)
			   (lambda vars action))
		     ...))))
	 (begin
	   (set! scanner-2 scanner)
	   (set! grammar-name
	     (lambda (string)
	       '(printf "input string: ~s~%" string)
	       (let ((item-list (scanner string)))
		 '(printf "item-list: ~s~%" item-list)
		 (parser item-list))))
	   'grammar-name))])

(define make-scanner
  (lambda (rhss)
    (let*
      ((keywords (collect-keywords rhss))
       (cook-symbol
	 (lambda (chars)
	   (let ([sym (char-list->symbol chars)])
	     (if (memq sym keywords)
	       (make-lexical-item sym '())
	       (make-lexical-item 'symbol sym)))))
       (automaton
	 `((initial ,lex-error
	     ((#\space whitespace)
	      (#\tab  whitespace)
	      (#\newline whitespace)
	      ((#\a #\z) symbol)
	      ((#\A #\Z) symbol)
	      (#\, comma)
	      ((#\* #\/) symbol)	; except "," 
	      (#\= symbol)
	      (#\( lparen)
	      (#\) rparen)
	      (#\; semicolon)
	      ((#\0 #\9) number)))
	   (symbol ,cook-symbol
	     (((#\a #\z) symbol)
	      ((#\A #\Z) symbol)
	      ((#\0 #\9) symbol)
	      (#\* symbol)
	      (#\+ symbol)
	      (#\- symbol)
	      (#\/ symbol)
	      (#\! symbol)
	      (#\= symbol)))
	   (whitespace ,cook-bogus
	     ((#\space whitespace)
	      (#\tab whitespace)
	      (#\newline whitespace)))
	   (number ,cook-number
	     (((#\0 #\9) number)))
	   (comma ,(cook-token 'comma) ())
	   (lparen ,(cook-token 'lparen) ())
	   (rparen ,(cook-token 'rparen) ())
	   (semicolon ,(cook-token 'semicolon) ())
	   (end-marker ,(cook-token 'end-marker) ()))))
      (set! automaton-2 automaton)
      (lambda (string) (scan-string automaton string)))))

(define collect-keywords
  (lambda (rhss)
    (if (null? rhss) '()
      (let loop
	((rhs (car rhss))
	 (keywords (collect-keywords (cdr rhss))))
	'(printf "collect-keywords-loop: rhs = ~s keywords = ~s rhss = ~s~%"
	  rhs keywords rhss)
	(cond
	  ((null? rhs) keywords)
	  ((symbol? (car rhs))		; it's a non-terminal
	   (loop (cdr rhs) keywords))
	  ((eq? (caar rhs) 'quote)	; it's a terminal symbol
	   '(printf "looking at terminal symbol ~s~%" (car rhs))
	   (loop (cdr rhs)
		 (if
		   (or (memq (cadar rhs) '(lparen rparen comma semicolon))
		       (memq (cadar rhs) keywords))
		   keywords
		   (cons (cadar rhs) keywords))))
	  (else (report-error
		  (list "unknown grammar item" (car rhs)))))))))

;;; ****************************************************************

;;; Translation to LL parsing system

;;; production ::= (prod-name (lhs rhs) action)  
;;; action ::= a closure (of several arguments)

(define make-parser
  (lambda (start-symbol productions)
    (let ((table (build-LL-table start-symbol productions)))
      (set! table-2 table)
      (lambda (item-list)
	(let ([result (parse-nt table start-symbol item-list)])
	  (if (eq? (lexical-item->class
		     (car (parser-result->unused result)))
		   'end-marker)
	    (parser-result->tree result)
	    (error 'parse-top-level
	      "symbols left over: ~s"
	      (car (parser-result->unused result)))))))))

(define translate-rhs
  (lambda (rhs)
    (map translate-rhs-item rhs)))

(define translate-rhs-item
  (lambda (item)
    (cond
      ((memq item '(int symbol))	; it's one of the data-items
       `(data-item ,item))
      ((symbol? item) `(non-terminal ,item))
      ((eq? (car item) 'quote)		; it's a token
       `(token ,(cadr item)))
      (else (report-error
	      (list "unknown grammar item" item))))))

;;; ****************************************************************

;;; Tests

'(define-unchecked-grammar sdt2 pgm
  (pgm
    (pgm (exp))
    (exp)
    `(pgm ,exp))
  (static
    (exp (symbol))
    (lit)
    `(ident ,lit))
  (if
    (exp ('if exp exp exp))
    (exp0 exp1 exp2)
    `(cond ,exp0 ,exp1 ,exp2))
  (appl
    (exp ('lparen exp exp 'rparen))
    (e0 e1)
    `(appl ,e0 ,e1)))

;;; ****************************************************************

;;; Interface to typechecker

(define check-grammar
  (lambda (productions)
    (map
      (lambda (production)
	(call/cc (lambda (next)
		   (check-production production
		     (lambda (msg)
		       (printf "~s:~%" (car production))
		       (pretty-print msg)
		       (next #f)))
		   (printf "~s~%" (car production))
		   #t)))
      productions)))

(define check-production
   (lambda (production fail)
      (let
	 ((prod-name (gen-fcn-sym))
	  (type-info (filter-type-info (cadr (cadr production))))
					; get type info for rhs
	  (vars (caddr production))
	  (term (cadddr production)))
	; (printf "check-production: production = ~s~%" production)
	; (printf "prod-name = ~s type-info = ~s vars = ~s term = ~s~%"
        ;	   prod-name type-info vars term)
	 '(add-undefined-types-to-badlist (cons 'dummy type-info) nil)
	 (checker
	    (ext-prefix vars type-info
	       (ext-prefix (list prod-name)
		 (list
		   (make-type-scheme
		   '()
		   (make-functional-type
		     (list (car (cadr production))) ; lhs nonterminal
						    ; is also a type.
		     (gen-type-var))))
		 empty-prefix))
	    (make-application prod-name (list term))
	    empty-prefix
	    fail))))

(define filter-type-info
  (lambda (l)
    (cond
      ((null? l) nil)
      ((and
	 (not (atom? (car l)))
	 (eq? (caar l) 'quote))
       (filter-type-info (cdr l)))
      (t (cons
	   (make-type-scheme
	     '()
	     (car l))
	   (filter-type-info (cdr l)))))))

(define check-production1
  (lambda (production)
    (let ((ans (check-production production report-error)))
      (pretty-print (expand-type-exp (car ans) (cadr ans))))))

(define and*
  (lambda (l)
    (cond
      ((null? l) #t)
      ((car l) (and* (cdr l)))
      (else #f))))

;;; ****************************************************************

;;; A test tag, currently commented out:

'(define-type-abbrev Num int)
'(define-type-abbrev Digit int)

'(define-grammar sdt3 Num
  (zero
    (Digit ('a))
    ()
    0)
  (one
    (Digit ('b))
    ()
    1)
  (empty-number
    (Num ())
    ()
    0)
  (non-empty-number
    (Num (Digit Num))
    (x y)
    (+ (* 2 y) x)))
    
