/*  PARSE_UTILS.PL  */


:- module parse_utils.


:- public lookahead/3,
          integer/3,
          real/3,
          number/3,
          atom/3,
          rem/2,
          rem/3,
          lookahead_rem/2,
          lookahead_rem/3,
          arb/2,
          arb/3,
          arbno/3,
          arbno_and_count/4,
          break/4,
          bal/0.


/*
SPECIFICATION
-------------

This module exports some miscellaneous DCG predicates. Many of them
imitate, and are named after, Snobol patterns: arb, rem, arbno, break,
bal. See the reference in the implementation section, or any Snobol
textbook, for further details.

In the Tutor, these are used mainly for pattern-matching erroneous
sentences, to try and make a diagnosis.


PUBLIC DCG lookahead( W? ):

Matches the next terminal symbol, unifying it with W. Does _not_ consume
the input.


PUBLIC DCG integer( I? ):
           real( R? ):
           number( N? ):
           atom( A? ):

Match the next terminal symbol if it is of the appropriate type and
unify it with the argument.


PUBLIC DCG rem:
           rem( L? ):

Match the rest of the input. The one-argument form unifies it with L.


PUBLIC DCG lookahead_rem:
           lookahead_rem( L? ):

As for rem, but don't consume any input.

Example:
    lookahead_rem( L ), rule.
Unifies L with all the remaining terminal symbols, and then tries 'rule'
as though the lookahead_rem were not there.                    


PUBLIC DCG arb:
           arb( L? ):

Match an arbitrary number of terminal symbols. The one-argument form
unifies them with L.

This starts off by matching the null list, and matches lists of
successively increasing length on backtracking. Hence it can be used as
a don't-care segment in rules like
    two_aces --> [ace], arb, [ace].
    delimited_segment( Delimiter, Segment ) -->
        [Delimiter], arb(Segment), [Delimiter].


PUBLIC DCG arbno( R+ ):

Matches an arbitrary number of R's. R must be a non-terminal of arity 2,
i.e. one which when occuring in a DCG rule has no arguments.

Example:
    integer --> integer(_).
    integers --> arbno( integer ).


PUBLIC DCG arbno_and_count( R+, N- ):

Matches an arbitrary number of R's, as for arbno, and unifies the number
of them with N.

Example:
    integer --> integer(_).
    integers -->
        arbno_and_count( integer, N ),
        { write(N), write( ' integers found') }.                    


PUBLIC DCG break( Del+, L? ):

Matches everything up to the next occurrence of Del, unifying the
terminal symbols before but not including Del with L. Del must be
a terminal symbol, not (as for arbno) a non-terminal.

Example:
    phrase( break(x,L), [1,2,3,x] )
sets L to [1,2,3].


PUBLIC DCG bal:

Matches a sequence of terminal symbols which is balanced in the brackets
() [] and {}.

Example:
    Matches [ '(', '{', a, '}', ')', '[', ']' ]
    Does not match [ '(', '{', a, ')', '}', '[', ']' ]
*/


/*
IMPLEMENTATION
--------------

Some of these are patterned after Snobol patterns. The implementation of
'bal' in particular was guided by "Algorithms in Snobol 4" by James
Gimpel. This is an excellent reference for ideas for string pattern
matching and its implementation.
*/


:- needs real / 1.


lookahead( W, [W|Rest], [W|Rest] ).


integer(I) -->
    [I], { integer(I) }.


real(R) -->
    [R], { real(R) }.


number(N) -->
    [N], { integer(N) ; real(N) }.


atom(N) -->
    [N], { atom(N) }.


rem( L, L, [] ).
rem( L, [] ).


lookahead_rem( L, L, L ).
lookahead_rem( L, L ).


arb -->
    [].
arb -->
    [_], arb.


arb([]) -->
    [].
arb([H|T]) -->
    [H], arb(T).


arbno( R ) -->
    [].
arbno( R, In0, In) :-
    functor( Goal, R, 2 ),
    arg( 1, Goal, In0 ),
    arg( 2, Goal, In1 ),
    call( Goal ),
    arbno( R, In1, In ).


arbno_and_count( R, N ) -->
    arbno_and_count( R, 0, N ).


arbno_and_count( R, N, N ) -->
    [].
arbno_and_count( R, N0, N, In0, In ) :-
    functor( Goal, R, 2 ),
    arg( 1, Goal, In0 ),
    arg( 2, Goal, In1 ),
    call( Goal ),
    N1 is N0 + 1,
    arbno_and_count( R, N1, N, In1, In ).


break( Del, [] ) --> [Del], !.
break( Del, [H|T] ) --> [H], break( Del, T ).


gbal --> left_bracket(P), ( bal ; [] ), right_bracket(P).
gbal --> [P], { not(is_bracket(P) ) }.


bal --> gbal, gbals.


gbals --> [].
gbals --> gbal, gbals.


is_bracket( '(' ).
is_bracket( '[' ).
is_bracket( '{' ).
is_bracket( ')' ).
is_bracket( ']' ).
is_bracket( '}' ).


left_bracket( '(' ) --> [ '(' ].
left_bracket( '[' ) --> [ '[' ].
left_bracket( '{' ) --> [ '{' ].


right_bracket( '(' ) --> [ ')' ].
right_bracket( '[' ) --> [ ']' ].
right_bracket( '{' ) --> [ '}' ].


:- endmodule.
