;;; This is the file LANG.SCM

;;; Language generation and parsing, using AMB

;;; The language in which we are interested can be described by the following
;;; grammar (the notation is called Backus-Naur Form, after John Backus and
;;; Peter Naur -- see footnote).
;;;
;;; <sentence> ::= <noun phrase> <verb phrase>
;;; <noun phrase> ::= <np3> | <noun phrase> <prepositional phrase>
;;; <np3> ::= <article> <np2>
;;; <np2> ::= <np> | <adjective> <np2>
;;; <np>  ::= <noun> | <noun> <np>
;;;
;;; <prepositional phrase> ::= <preposition> <noun phrase>
;;;
;;; <verb phrase> ::= <verb> | <verb> <adverb>
;;;                   | <verb phrase> <prepositional phrase> 
;;;
;;; <noun> ::=  student | professor | dog | cat | class
;;; <verb> ::=  studies | lectures | eats | sleeps
;;; <adjective> ::=  brilliant | tired
;;; <adverb> ::=  quickly | delightedly | slothfully
;;; <article> ::= the | a
;;; <preposition> ::=  with | for | to | in | by
;;;
;;; In this notation, words within pointed brackets are names of grammatical
;;; categories, and the vertical bars separate alternatives.

;;; Let us try to implement this notation in Scheme.

;;; We shall need a way of indicating that a parser has gone down a wrong
;;; track and should back up.  This is done by the procedure call (fail):

(define (fail) (amb))

;;; The kernel of a parser is a procedure that takes as input a list of
;;; symbols, and if successful returns as output a combination of the parsed
;;; structure and the remaining portion of the list that is left to parse.  If
;;; not successful it calls (fail).  The data structure for the output
;;; combination is defined as follows.

(define make-partial-parse cons)

(define parsing       car)
(define rest-sentence cdr)

;;; A parser itself is a parameterless procedure which when called produces
;;; the appropriate parser kernel, which actually does the work.  This extra
;;; layer of wrapping is necessary to control and delay evaluation; it allows
;;; grammatical categories to be defined recursively, and prevents recursively
;;; defined grammars from unwinding themselves for ever.

;;; For parsing a complete piece of input we use the call (parse p l).  This
;;; parses the list l using the parser p, but rejects any parsings that have
;;; not dealt with the complete list; the result is imply the parsing. 

(define (parse p l)
  (let ((pp ((p) l)))
    (cond ((null? (rest-sentence pp)) (parsing pp))
	  (else (fail)))))

;;; ============================

;;; We now implement our grammar notation.  As usual, our notation contains
;;; "primitives", "means of combination" and "means of abstraction".

;;; PRIMITIVES

;;; The "primitives" of our notation will be parsers that accept only one
;;; particular symbol.

(define (prim sym)
  (lambda ()
    (lambda (l)
      (cond ((null? l) (fail))
	    ((eq? (car l) sym) (make-partial-parse sym (cdr l)))
	    (else (fail))))))

;;; Note that other primitives could be added here, for example one which
;;; accepted any single symbol.

;;; MEANS OF COMBINATION

;;; There are two principal "means of combination": the first, seq, takes an
;;; arbitrary number of parsers and produces a parser which attempts to
;;; produce a sequence of parsings, corresponding to the argument list.

(define (seq . parsers) (seqlist parsers))

(define (seqlist parsers)
  (lambda ()
    (lambda (l)
      (cond ((null? parsers) (make-partial-parse '() l))
	    (else (let ((first-item (((car parsers)) l)))
		    (let ((other-items (((seqlist (cdr parsers)))
					(rest-sentence first-item))))
		      (make-partial-parse (cons (parsing first-item)
						(parsing other-items))
					  (rest-sentence other-items)))))))))

;;; The other principal "means of combination", alt, takes an arbitrary number
;;; of parsers and makes a non-deterministic choice between them.  We could
;;; simply use amb for this, but it is better practice to define a new name,
;;; and for this we have to use amblist.

(define (alt . parsers)
  (lambda ()
    ((amblist parsers))))

;;; Note that the extra layer of procedure here prevents the amblist from
;;; being evaluated unless it is needed.  This avoids having the backup
;;; mechanism get lost down never-ending irrelevant blind alleys.

;;; We need one other combiner for the special case
;;;
;;; <something> ::= <base> | <something> <extension1> <extension2>
;;;
;;; because it is left-recursive.  Otherwise, in order to parse a <something>,
;;; we would have to start by parsing a <something>, and this would lead to
;;; infinite descent.  Instead, we parse the <base> first, and then check to
;;; see if there are any <extension>'s to follow: 

(define (lr-alt base . extensions)
  (define (add-extensions pp)
    (amb pp
	 (let ((other-items (((seqlist extensions))
			     (rest-sentence pp))))
	   (add-extensions (make-partial-parse (cons (parsing pp)
						     (parsing other-items))
					       (rest-sentence other-items))))))
  (lambda ()
    (lambda (l)
      (add-extensions ((base) l)))))

;;; MEANS OF ABSTRACTION

;;; The principal "means of abstraction" is simply define, as usual; but it is
;;; also convenient to have a way of attaching the name of our abstractions to
;;; the parsings produced.  (named n p) produces a parser that behaves just
;;; like the parser p except that the name n is attached to the parsing
;;; produced.

(define (named n p)
  (lambda ()
    (lambda (l)
      (let ((pp ((p) l)))
	(make-partial-parse (list n (parsing pp))
			    (rest-sentence pp))))))

;;; ============================

;;; EXAMPLE: PALINDROMES

;;; Let us check all this out with palindromes, defined as follows:
;;;
;;; <palindrome> ::= a | b | c | a a | b b | c c |
;;;                  a <palindrome> a | b <palindrome> b | c <palindrome> c

(define a (prim 'a))
(define b (prim 'b))
(define c (prim 'c))

(define (palindrome)
  ((alt a
	b
	c
	(seq a a)
	(seq b b)
	(seq c c)
	(seq a palindrome a)
	(seq b palindrome b)
	(seq c palindrome c))))

;;; Note that palindrome is defined as a function, and has a corresponding
;;; extra pair of parentheses around its body.  This allows the recursion in
;;; the definition to work without undefinedness.  The other definitions (a, b
;;; and c) could have been treated likewise without harm, but that would have
;;; been unnecessary in those cases.

;;; ============================

;;; THE GRAMMAR OF ENGLISH SENTENCES

;;; We now give the Scheme version of the grammar appearing at the beginning
;;; of this file.  Note that to avoid unnecessary clutter we "name" only the
;;; principal grammatical categories.

;;; Perhaps the next few definitions should really have been written in the
;;; following form:
;;;
;;; (define (noun) ((named 'noun
;;; 		           (alt (prim 'student)
;;; 			        (prim 'professor)
;;; 			        (prim 'dog)
;;; 			        (prim 'cat)
;;; 			        (prim 'class)))))
;;;
;;; but instead we streamline just a little.

(define (noun) ((named 'noun (prim (amb 'student 'professor 'dog 'cat 'class)))))

(define (verb) ((named 'verb (prim (amb 'studies 'lectures 'eats 'sleeps)))))

(define (adjective) ((named 'adjective (prim (amb 'brilliant 'tired)))))

(define (adverb) ((named 'adverb (prim (amb 'quickly 'delightedly 'slothfully)))))

(define (article) ((named 'article (prim (amb 'the 'a)))))

(define (preposition) ((named 'preposition (prim (amb 'with 'for 'to 'in 'by)))))

;;;

(define (np) ((alt noun (seq noun np))))

(define (np2) ((alt np (seq adjective np2))))

(define np3 (seq article np2))

(define (noun-phrase) ((named 'noun-phrase
			      (lr-alt np3 prepositional-phrase))))

;;;

(define prepositional-phrase (named 'prepositional-phrase
				    (seq preposition noun-phrase)))
;;;

(define (verb-phrase)
  ((named 'verb-phrase
	  (lr-alt (alt verb (seq verb adverb))
		  prepositional-phrase))))

;;;

(define sentence (named 'sentence (seq noun-phrase verb-phrase)))

;;; ============================

;;; A SLIGHTLY SIMPLER GRAMMAR

(define (np2-b) ((alt noun (seq adjective noun))))

(define np3-b (seq article np2-b))

(define (noun-phrase-b) ((named 'noun-phrase
			      (alt np3-b (seq np3-b prepositional-phrase-b)))))

(define prepositional-phrase-b (named 'prepositional-phrase
				    (seq preposition np3-b)))
(define vp-b (alt verb (seq verb adverb)))

(define (verb-phrase-b)
  ((named 'verb-phrase (alt vp-b (seq vp-b prepositional-phrase-b)))))

(define sentence-b (named 'sentence (seq noun-phrase-b verb-phrase-b)))

;;; ============================

;;; Footnote: 
;;;
;;; John Backus is a Californian IBM-er, who was one of the inventors of
;;; FORTRAN, and has more recently been working on functional programming.
;;; Peter Naur is the Grand Old Man of Danish computer science.  They invented
;;; Backus-Naur form when they were both part of the team inventing the
;;; programming language Algol 60.
