/*  TESTS.PL  */
/*  Shelved on the 3rd of October 1988  */


%   Instructions and examples for the C Prolog theorem prover
%
%              March 19, 1987   
%
% The top level call is try(File) where File has clauses converted to
% Horn clauses as follows:
%
%   The clauses p or q, p or (not q), (not p) or q, (not p) or (not q)
%   would be expressed as
%
%   p :- not(q).
%   p :- q.
%   q :- p.
%   false :- p, q.
%
% A clause may also be expressed as p :-- q, r et cetera, with :-- in
% place of :-  .  This means that such clauses are not counted in the
% size of the proof.  Clauses should be expressed using :-- if it is
% known that such clauses will not lead to an infinite loop, and if
% one is fairly sure they should be used whenever possible, i.e., if they
% constitute part of the definition of a predicate.
%
% A clause may be expressed using :-. (notice the period at the end)
% instead of :- or :-- if it is
% desired to give this clause a weight of one.  This is a way of
% favoring the use of a clause without the risk of an infinite loop
% which may occur with :-- if it is used improperly.
%
% An assertion of the form replace(<expr1>, <expr2>). in the input
% signifies that all subgoals of form <expr1> should be replaced by
% subgoals of the form <expr2> before attempting to solve them.  This
% is like a rewrite applied at the ``top level.''  This is sound
% if <expr1> :- <expr2> is valid.
%
% An assertion of the form rewrite(<expr1>, <expr2>). in the input
% signifies that all subexpressions of form <expr1> should be replaced by
% subexpressions of the form <expr2>.  This is like a rewrite applied
% anywhere, not just at the top level.  This is sound if the logical
% equivalence <expr1> == <expr2> is valid, or, if the expressions are
% terms, if the equation <expr1> = <expr2> is valid.
%
% New clauses of the form (s = t) will be converted into rewrite
% rules as specified by interaction with the user.
%
% A subgoal of the form prolog(<expr>) passes <expr> to Prolog to
% evaluate.  This permits a convenient interface with Prolog source code.
% Such subgoals should not be negated.  Example:
%   lt(X,Y) :- prolog(X < Y).
% This also permits logical variables to be instantiated, as,
%   zero(X) :- prolog(X = 0).
%
% A subgoal of the form (L :- M) is a directive to the prover to assume
% M and try to prove L.  For example, P :- (L :- M) means that P is
% true if, when assuming M, we can prove L.  If M is negative, it
% may help to assert nosplits (see below).
%
% An assertion nosave(X) in Prolog means that goals of form X will not
% be cached, nor will splitting be done on them, nor will certain other
% work be done.  This should be done if goals of the form X are very
% easy to solve and it does not pay to avoid repeated work or do
% case analysis on them.  Note -- this may lead to incompleteness
% (that is, no proof found even when one exists) if used improperly.
% It may significantly reduce computation time if used correctly.
% To save nothing except goals of the form false :- X, assert(nosave).
% This permits faster inferences but sometimes many more of them.
%
% An assertion of nosplits inhibits all case analysis.
%
% To do only forward chaining, no backward chaining, assert(f_only).
% This may be incomplete.  To do only back chaining, assert(b_only).
%
% To avoid size computations of subgoals and solutions, assert(nosize).
% This can make the prover significantly faster.
%
% To bound the maximum size of a literal in a proof, assert(maxsize).
% This often helps very much.
% 
% There are two constants that influence how much is charged for a
% lemma that has already been proved.  They are solution_size_multiplier
% and proof_size_multiplier.  Default values are 0.1 and 0.4.
% The former weights the size of the solution and the latter
% weights the length of the proof used to derive it.  To
% set them type solution_size_multiplier(M) and proof_size_multiplier(N).
% Good values to try are (0.125, 0.0) and (0, 1.0).
%
% Suggested allocations are -g 1000 -h 1000 -t 700
%
% Here are some examples for the simplified problem reduction format
% prover.  Most times are on a SUN 3.  Each example
% should be in a separate file when the prover is used.
% Note -- times and numbers of inferences may vary from those given here.
%
%                   Dave Plaisted
%                   UNC Chapel Hill
%                   December 31, 1986

% ****************************************************************************
% Chang and Lee Example 1
p(g(X,Y),X,Y).
p(X,h(X,Y),Y).
false :- p(k(X),X,k(X)).
p(U,Z,W) :- p(X,Y,U), p(Y,Z,V), p(X,V,W).
p(X,V,W) :- p(X,Y,U), p(Y,Z,V), p(U,Z,W).

% takes about 1 second.  If assert(nosave(_)) first, takes .3 seconds.

% ****************************************************************************
% Chang and Lee Example 2
p(e,X,X).
p(X,e,X).
p(X,X,e).
p(a,b,c).
false :- p(b,a,c).
p(U,Z,W) :- p(X,Y,U), p(Y,Z,V), p(X,V,W).
p(X,V,W) :- p(X,Y,U), p(Y,Z,V), p(U,Z,W).

% takes about 8 seconds, 123 inferences

% ****************************************************************************
% Chang and Lee Example 3
false :- p(a,e,a).
p(e,X,X).
p(i(X),X,e).
p(U,Z,W) :- p(X,Y,U), p(Y,Z,V), p(X,V,W).
p(X,V,W) :- p(X,Y,U), p(Y,Z,V), p(U,Z,W).

% takes about 3 seconds, 29 inferences

% ****************************************************************************
% Chang and Lee Example 4
p(e,X,X).
p(i(X),X,e).
false :- p(a,X,e).
p(U,Z,W) :- p(X,Y,U), p(Y,Z,V), p(X,V,W).
p(X,V,W) :- p(X,Y,U), p(Y,Z,V), p(U,Z,W).

% ****************************************************************************
% Chang and Lee Example 5
s(a).
p(e,X,X).
p(X,e,X).
p(X,i(X),e).
p(i(X),X,e).
false :- s(e).
s(Z) :- p(X,i(Y),Z),s(X),s(Y).
p(U,Z,W) :- p(X,Y,U), p(Y,Z,V), p(X,V,W).
p(X,V,W) :- p(X,Y,U), p(Y,Z,V), p(U,Z,W).

% ****************************************************************************
% Chang and Lee Example 6
s(b).
p(e,X,X).
p(X,e,X).
p(i(X),X,e).
p(X,i(X),e).
false :- s(i(b)).
s(Z) :- p(X,i(Y),Z),s(X),s(Y).
p(U,Z,W) :- p(X,Y,U), p(Y,Z,V), p(X,V,W).
p(X,V,W) :- p(X,Y,U), p(Y,Z,V), p(U,Z,W).

% ****************************************************************************
% Chang and Lee Example 7
p(a).
m(a,s(c),s(b)).
m(X,X,s(X)).
false :- d(a,b).
m(Y,X,Z) :- m(X,Y,Z).
d(X,Z) :- m(X,Y,Z).
d(X,Y) :- p(X),not(d(X,Z)),m(Y,Z,U),d(X,U).

% takes about 2.6 seconds, 16 inferencees.  With assert(nosave),
% takes about .5 seconds, 7 inferences.
% with assert(b_only), takes 1.6 seconds, 5 inferences (old value)

% ****************************************************************************
% Chang and Lee Example 8
l(1,a).
d(X,X).
false :- d(X,a),p(X).
d(X,Z) :- d(Y,Z),d(X,Y).
p(f(X)) :- l(1,X),l(X,a).
d(f(X),X) :- l(1,X),l(X,a).
d(g(X),X) :- not(p(X)).
l(1,g(X)) :- not(p(X)).
l(g(X),X) :- not(p(X)).

% takes about 8.5 seconds, 62 inferences
% with assert(f_only) takes 8 seconds, 73 inferences

% ****************************************************************************
% Chang and Lee Example 9
% For this one the way the last three rules was expressed, was important
% for efficiency
l(X,f(X)).
false :- l(X,X).
false :- l(X,Y),l(Y,X).
l(Y,X) :- d(X,f(Y)).
l(f(a),X) :- l(a,X),p(X).
d(h(X),X) :- not(p(X)).
p(h(X)) :- not(p(X)).
l(h(X),X) :- not(p(X)).

% takes about 8.9 seconds, 38 inferences

% ****************************************************************************
% A verification condition for Hoare's FIND program
lt(j,i).
le(m,p).
le(p,q).
le(q,n).
false :- le(b(p),b(q)).
le(X,Y) :- not(lt(Y,X)).
le(b(X),b(Y)) :- le(m,X),le(Y,j),le(X,Y).
le(b(X),b(Y)) :- le(i,X),le(Y,n),le(X,Y).
le(b(X),b(Y)) :- le(m,X),lt(X,i),lt(j,Y),le(Y,n).

% takes about 6.1 seconds, 20 inferences

% ****************************************************************************
% A conditional planning problem in situation calculus representation
% (I composed this one)
at(f,s0).
cold(S) :- not(warm(T)).
at(b,walk(b,S)) :- at(a,S).
at(b,drive(b,S)) :- at(a,S).
at(a,walk(a,S)) :- at(b,S).
at(a,drive(a,S)) :- at(b,S).
at(c,skate(c,S)) :- cold(S),at(b,S).
at(b,skate(b,S)) :- cold(S),at(c,S).
at(d,climb(d,S)) :- warm(S),at(b,S).
at(b,climb(b,S)) :- warm(S),at(d,S).
at(d,go(d,S)) :- at(c,S).
at(c,go(c,S)) :- at(d,S).
at(e,go(e,S)) :- at(c,S).
at(c,go(c,S)) :- at(e,S).
at(f,go(f,S)) :- at(d,S).
at(d,go(d,S)) :- at(f,S).
false :- at(a,S).

% takes about 9.8 seconds, 34 inferences.
% when assert(nosave(at(_,_))) and assert(nosave(not(_)))
% is done first, proof in 5.3 seconds, 22 inferences
% With assert(b_only), takes 4.1 seconds, 13 inferences (old value)

% ****************************************************************************
% Commutativity of join (set intersection); el is set membership
not(el(X,p(Y))) :- not(sub(X,Y)).
el(X,p(Y)) :- sub(X,Y).
not(el(X,join(Y,Z))) :- not(el(X,Y)).
el(X,join(Y,Z)) :- el(X,Y),el(X,Z).
el(X,join(Y,Z)) :- el(X,Z),el(X,Y).
false :- eq(join(a,b),join(b,a)).

% rewrites and replacements.  The prover does a kind of outermost
% rewriting;  the notsub rule is designed to insure that the
% sub(X,Y) replacement before it is only done in a montone
% context, that is, no enclosing negations

rewrite(eq(X,Y),and(sub(X,Y),sub(Y,X))).
rewrite(sub(X,Y),or(not(el(g(X,Y),X)),el(g(X,Y),Y))).
rewrite(not(sub(X,Y)),notsub(X,Y)).
replace(notsub(X,Y),and(el(Z,X),not(el(Z,Y)))).
rewrite(el(X,p(Y)),sub(X,Y)).
rewrite(el(X,join(Y,Z)),and(el(X,Y),el(X,Z))).
rewrite(not(eq(X,Y)),or(not(sub(X,Y)),not(sub(Y,X)))).
rewrite(not(el(X,p(Y))),not(sub(X,Y))).
rewrite(not(el(X,join(Y,Z))),or(not(el(X,Y)),not(el(X,Z)))).
rewrite(not(or(X,Y)),and(not(X),not(Y))).
rewrite(not(and(X,Y)),or(not(X),not(Y))).
rewrite(not(not(X)),X).
rewrite(or(X,and(Y,Z)),and(or(X,Y),or(X,Z))).
rewrite(or(and(X,Y),Z),and(or(X,Z),or(Y,Z))).
or(X,Y) :-- prolog(tautology(or(X,Y))).
or(X,Y) :-- X.
or(X,Y) :-- Y.
and(X,Y) :-- X,Y.

% solved July 16 1986 8.4 cpu seconds 8 inferences size 5 (old values)

% ****************************************************************************
% Prolog source code for the tautology checker used in the commutativity
% of set intersection example;  needs to be directly loaded into C Prolog
    tautology(X) :- or_memb(not(Y),X),
    or_member(Y,X).

v    or_memb(not(X),Y) :- var(Y), !, Y=not(X).  % don't instantiate variables
                        % very much
    or_memb(X,X).
    or_memb(Z, or(X,Y)) :-
    (or_memb(Z,X) ; or_memb(Z,Y)).

    or_member(X,Y) :- var(Y), !, unify(X,Y).
    or_member(X,Y) :- unify(X,Y).
    or_member(Z, or(X,Y)) :-
    (or_member(Z,X) ; or_member(Z,Y)).
    
% ****************************************************************************
% the power set problem; p(X) is the power set of X
not(el(X,p(Y))) :- not(sub(X,Y)).
el(X,p(Y)) :- sub(X,Y).
not(el(X,join(Y,Z))) :- not(el(X,Y)).
el(X,join(Y,Z)) :- el(X,Y),el(X,Z).
el(X,join(Y,Z)) :- el(X,Z),el(X,Y).
replace(false,eq(p(join(a,b)),join(p(a),p(b)))).

rewrite(eq(X,Y),and(sub(X,Y),sub(Y,X))).
rewrite(sub(X,Y),or(not(el(g(X,Y),X)),el(g(X,Y),Y))).
rewrite(not(sub(X,Y)),notsub(X,Y)).
replace(notsub(X,Y),and(el(Z,X),not(el(Z,Y)))).
rewrite(el(X,p(Y)),sub(X,Y)).
rewrite(el(X,join(Y,Z)),and(el(X,Y),el(X,Z))).
rewrite(not(eq(X,Y)),or(not(sub(X,Y)),not(sub(Y,X)))).
rewrite(not(el(X,p(Y))),not(sub(X,Y))).
rewrite(not(el(X,join(Y,Z))),or(not(el(X,Y)),not(el(X,Z)))).
rewrite(not(or(X,Y)),and(not(X),not(Y))).
rewrite(not(and(X,Y)),or(not(X),not(Y))).
rewrite(not(not(X)),X).
rewrite(or(X,and(Y,Z)),and(or(X,Y),or(X,Z))).
rewrite(or(and(X,Y),Z),and(or(X,Z),or(Y,Z))).
or(X,Y) :-- prolog(tautology(or(X,Y))).
or(X,Y) :-- X.
or(X,Y) :-- Y.
and(X,Y) :-- X,Y.

% takes 84.4 seconds, 42 inferences, size 5, huge proof

% ****************************************************************************
% a simple example to illustrate splitting
false :- p,q.
p :- not(r).
p :- r.
q :- not(s).
q :- s.

% takes 1.6 seconds, 5 inferences

% ****************************************************************************
% schubert's steamroller, almost in the form he gave
w(w).
b(b).
s(s).
f(f).
c(c).
g(g).
a(X) :-- w(X).
a(X) :-- b(X).
a(X) :-- s(X).
a(X) :-- c(X).
a(X) :-- f(X).
p(X) :-- g(X).
m(X,Y) :-- c(X),b(Y).
m(X,Y) :-- b(X),f(Y).
m(X,Y) :-- s(X),b(Y).
m(X,Y) :-- f(X),w(Y).
false :- e(Y,X),f(X),w(Y).
false :- e(Y,X),w(Y),g(X).
false :- e(X,Y),b(X),s(Y).
e(X,Y) :-- b(X),c(Y).
p(h(X)) :-- c(X).
p(i(X)) :-- s(X).
e(X,h(X)) :-- c(X).
e(X,i(X)) :-- s(X).
g(j(X,Y)) :-- a(X),a(Y).
false :- a(Y),e(X,Y),a(X),e(Y,j(X,Y)).
e(X,Y) :- split2(X),p(Y).
split2(X) :-- m(Z,X),a(X),a(Z),split(Z),not(e(X,Z)).
split(Z) :-- e(Z,W),p(W).

% proof found in 557 seconds, 2944 inferences, size 18

% ****************************************************************************
% schubert's steamroller, modified to delay instantiation of
% arguments to j
b(b).
s(s).
f(f).
c(c).
g(g).
a(X) :-- w(X).
a(X) :-- b(X).
a(X) :-- s(X).
a(X) :-- c(X).
a(X) :-- f(X).
p(X) :-- g(X).
m(X,Y) :-- c(X),b(Y).
m(X,Y) :-- b(X),f(Y).
m(X,Y) :-- s(X),b(Y).
m(X,Y) :-- f(X),w(Y).
false :- f(X),w(Y),e(Y,X).
false :- w(Y),g(X),e(Y,X).
false :- b(X),s(Y),e(X,Y).
e(X,Y) :-- b(X),c(Y).
p(h(X)) :-- c(X).
p(i(X)) :-- s(X).
e(X,h(X)) :-- c(X).
e(X,i(X)) :-- s(X).
g(j(X,Y)) :-- not(na(X)),not(na(Y)).
false :- not(na(Y)),not(na(X)),e(X,Y),e(Y,j(X,Y)).
e(X,Y) :- split2(X),p(Y).
split2(X) :- m(Z,X),a(X),a(Z),split(Z),not(e(X,Z)).
split(Z) :- e(Z,W),p(W).
false :-- na(X),a(X).

% takes 193.6 seconds, 815 inferences, size 18
% with assert(f_only), takes 157 seconds, 1105 inferences, size 56

% ****************************************************************************
% the 'latin squares' problem

eq(X,X).
eq(Y,X) :- eq(X,Y).
eq(Z,U) :-- lat(X,Y,Z), lat(X,Y,U).
eq(Y,U) :-- lat(X,Y,Z), lat(X,U,Z).
eq(X,U) :-- lat(X,Y,Z), lat(U,Y,Z).
eq(Z,U) :-- gre(X,Y,Z), gre(X,Y,U).
eq(Y,U) :-- gre(X,Y,Z), gre(X,U,Z).
eq(X,U) :-- gre(X,Y,Z), gre(U,Y,Z).
lat(X,Y,a) :-- not(lat(X,Y,b)).
lat(X,a,Y) :-- not(lat(X,b,Y)).
lat(a,X,Y) :-- not(lat(b,X,Y)).
gre(X,Y,a) :-- not(gre(X,Y,b)).
gre(X,a,Y) :-- not(gre(X,b,Y)).
gre(a,X,Y) :-- not(gre(b,X,Y)).
eq(X,V) :--- lat(X,Y,Z), gre(X,Y,U), lat(V,W,Z),
not(eq(Y,W)), gre(V,W,U).
false :-- eq(a,b).

% takes 549 seconds, 1323 inferences, size 14
% used to take 2 hours on earlier versions of this prover

**********************************************************************
% schubert's steamroller, illustrating the Prolog interface
w(w).
b(b).
s(s).
f(f).
c(c).
g(g).
a(X) :-- prolog(X = w).
a(X) :-- prolog(X = b).
a(X) :-- prolog(X = s).
a(X) :-- prolog(X = c).
a(X) :-- prolog(X = f).
p(X) :-- g(X).
m(X,Y) :-- prolog((X = c , Y = b)).
m(X,Y) :-- prolog((X = b, Y = f)).
m(X,Y) :-- prolog((X = s, Y = b)).
m(X,Y) :-- prolog((X = f, Y = w)).
false :-- na(X),a(X).
false :- prolog(X = f),prolog(Y = w),e(Y,X).
false :- prolog(Y = w),g(X),e(Y,X).
false :- prolog(X = b),prolog(Y = s),e(X,Y).
e(X,Y) :-- prolog(X = b),prolog(Y = c).
p(h(X)) :-- prolog(X = c).
p(i(X)) :-- prolog(X = s).
e(X,h(X)) :-- prolog(X = c).
e(X,i(X)) :-- prolog(X = s).
g(j(X,Y)) :-- not(na(X)),not(na(Y)).
false :- not(na(Y)),not(na(X)),e(X,Y),e(Y,j(X,Y)).
e(X,Y) :- split2(X),p(Y).
split2(X) :- m(Z,X),a(X),a(Z),split(Z),not(e(X,Z)).
split(Z) :- e(Z,W),p(W).

% takes 139.5 seconds, 825 inferences, size 18 (old values)

*************************************************************************
% when the following is loaded into Prolog before solving the above
% example:

    nosave(a(X)).
    nosave(m(X,Y)).
    nosave(p(X)).
    nosave(g(X)).
    nosave(not(X)).

% then a proof can be obtained in 67 seconds, 551 inferences

*************************************************************************
% Monkey and Bananas Example, situation calculus representation

% The initial situation
% Situation predicates: at, on, has, has_nothing

at(ladder,l0,s0).
at(monkey,l1,s0).
at(bananas,l2,s0).
on(ceiling,bananas,s0).
on(floor,monkey,s0).
on(floor,ladder,s0).
has_nothing(monkey,s0).

% Actions : climb, go, drop, grab

at(monkey,L,go(L,S)) :- on(floor,monkey,S).

has(monkey,X,grab(X,S)) :- can(grab(X,S)).

can(grab(X,S)) :--  non_monkey(X),
            at(X,L,S),at(monkey,L,S),
            has_nothing(monkey,S),
            on(H,monkey,S),on(H,X,S).

has_nothing(monkey,drop(X,S)) :- has(monkey,X,S).

on(ceiling,monkey,climb(ladder,S)) :- can(climb(ladder,S)).

can(climb(ladder,S)) :-- at(ladder,L,S),
            at(monkey,L,S),
            on(floor,ladder,S),
            on(floor,monkey,S),
            has_nothing(monkey,S).

at(X,L,S) :- has(monkey,X,S),at(monkey,L,S).

on(H,X,S) :- has(monkey,X,S),on(H,monkey,S).

% Frame axioms

has_nothing(monkey,climb(X,S)) :- can(climb(X,S)),
            has_nothing(monkey,S).
has_nothing(monkey,go(L,S)) :- has_nothing(monkey,S), on(floor,monkey,S).

has(monkey,X,go(L,S)) :- has(monkey,X,S), on(floor,monkey,S).

at(Y,L,grab(X,S)) :- can(grab(X,S)),at(Y,L,S).
at(Y,L,climb(X,S)) :- can(climb(X,S)),at(Y,L,S).
at(X,L,drop(Y,S)) :- has(monkey,Y,S),at(X,L,S).
at(X,L1,go(L2,S)) :- at(X,L1,S),non_monkey(X),has_nothing(monkey,S),
        on(floor,monkey,S).
at(X,L1,go(L2,S)) :- at(X,L1,S),non_monkey(X),has(monkey,Y,S),differ(X,Y),
        on(floor,monkey,S).

on(H,X,go(L,S)) :- on(H,X,S), on(floor,monkey,S).
on(H,X,climb(Y,S)) :- can(climb(Y,S)),
    on(H,X,S),non_monkey(X),has_nothing(monkey,S).
        % for completeness also need one if monkey has something
on(floor,Y,drop(Y,S)) :- has(monkey,Y,S).
on(floor,X,drop(Y,S)) :- has(monkey,Y,S),on(floor,X,S).
on(ceiling,X,drop(Y,S)) :- has(monkey,Y,S),differ(X,Y),on(ceiling,X,S).
on(H,X,grab(Y,S)) :- can(grab(Y,S)),on(H,X,S).

rewrite(go(Z,go(W,S)),go(Z,S)).
rewrite(grab(X,drop(X,S)),S).
rewrite(has(monkey,monkey,S),fail).

% Monkey predicate

non_monkey(ladder).
non_monkey(bananas).
differ(ladder,bananas).
differ(bananas,ladder).

% The goal

false :- has(monkey,bananas,S).

% proved in 402.5 seconds 1733 inferences size 18
% with no weighting of sizes of solutions,
% proved in 258 seconds size 14 1262 inferences (old value)

***************************************************************************

% Monkey and Bananas Problem
% State space representation
% Format of a situation is (monkey(loc,height,has),ladder(loc,height),
%       bananas(loc,height))
%
% can(S) means situation S is attainable
%

% Initial situation
can(monkey(l0,floor,[]),ladder(l1,floor),bananas(l2,ceiling)).

% Actions
can(monkey(L1,ceiling,[]),ladder(L1,floor),Y) :-
    can(monkey(L1,floor,[]),ladder(L1,floor),Y).    % climb up

can(monkey(L1,floor,[]),ladder(L1,floor),Y) :-
    can(monkey(L1,ceiling,[]),ladder(L1,floor),Y).  % climb down

can(monkey(L2,floor,ladder),ladder(L2,floor),Z) :-
    can(monkey(L1,floor,ladder),ladder(L1,floor),Z).
    % go holding ladder

can(monkey(L2,floor,bananas),Y,bananas(L2,floor)) :-
    can(monkey(L1,floor,bananas),Y,bananas(L1,floor)).
    % go holding bananas

can(monkey(L2,floor,[]),Y,Z) :-
    can(monkey(L1,floor,[]),Y,Z).
    % go holding nothing

can(monkey(L,H,[]),ladder(L,floor),Bananas) :-
    can(monkey(L,H,ladder),ladder(L,Z),Bananas).    % drop ladder

can(monkey(L,H,[]),Ladder,bananas(L,floor)) :-
    can(monkey(L,H,bananas),Ladder,bananas(L,Z)).   % drop bananas

can(monkey(L,H,bananas),Z,bananas(L,H)) :-
    can(monkey(L,H,[]),Z,bananas(L,H)). % grab bananas

can(monkey(L,H,ladder),ladder(L,H),Z) :-
    can(monkey(L,H,[]),ladder(L,H),Z).  % grab ladder

false :- can(monkey(_,_,bananas),_,_).      % the goal

% Takes 24.1 cpu seconds, 30 inferences.
%
% With assert(nosave), the proof was found in 9.4 seconds, size
% 7, 29 inferences, November 11 1986
%
% With assert(f_only), proof in 10.3 seconds, 58 inferences, size 11
% (increased size due to counting sizes of solutions in priorities)
%
% With assert(b_only), used to take 16.7 seconds, 8 inferences, gave the
% following proof:
%false:-
% (can(monkey(l2,ceiling,bananas),ladder(l2,floor),bananas(l2,ceiling)):-
% (can(monkey(l2,ceiling,[]),ladder(l2,floor),bananas(l2,ceiling)):-
% (can(monkey(l2,floor,[]),ladder(l2,floor),bananas(l2,ceiling)):-
% (can(monkey(l2,floor,ladder),ladder(l2,floor),bananas(l2,ceiling)):-
% (can(monkey(l1,floor,ladder),ladder(l1,floor),bananas(l2,ceiling)):-
% (can(monkey(l1,floor,[]),ladder(l1,floor),bananas(l2,ceiling)):-
% (can(monkey(l0,floor,[]),ladder(l1,floor),bananas(l2,ceiling)))))))))

***********************************************************************
% In an associative system with left and right identities, if the
% square of every element is the identity, the operation is
% commutative.  This example illustrates the use of equality axioms
% and interaction.  This is important for many practical
% applications.  Our treatment of equality is not exceptional but
% is often enough.

X * e = X.
e * X = X.
X * X = e.

% Spread out form of associativity axiom

X * V = W1 :- X * Y = U, Y * Z = V, U * Z = W1.
U * Z = W1 :- X * Y = U, Y * Z = V, X * V = W1.

% Simplifications

rewrite(X * e, X).
rewrite(e * X, X).
rewrite(X * X, e).
replace(X * Y = X * Z, Y = Z).
replace(Y * X = Z * X, Y = Z).
rewrite((X * Y) * Z, X * (Y * Z)).

% An equality axiom
X = X.

% The theorem
false :- a * b = X, b * a = X.

% Generated X * Y = Y * X after 5 minutes or so with user
% interaction Nov. 18 1986, did not make use of the theorem, did
% not stop execution until interrupted.

***********************************************************************

% A problem from the newsletter of the Association of Automated Reasoning

p(X, Y) :- not(q(X, Y)).
p(X, Z) :- p(X, Y), p(Y, Z).
q(Y, X) :- q(X, Y).
q(X, Z) :- q(X, Y), q(Y, Z).
false :- p(a, b).
false :- q(c, d).

% proved in 216 seconds 1243 inferences size 18 on the first try
% much worse with assert(nosave) or assert(f_only)
%
% now gets the proof in 46 seconds, 236 inferences, size 14

***********************************************************************

% The intermediate value theorem:  If f(a) =< 0 and f(b) >= 0 and
% a =< b and f is continuous then there exists a value X such that
% f(X) = 0.  Here p(X,Y) means X =< Y.  Also, l is the least
% upper bound of {X : X =< b, f(X)  =< 0}.

p(a, b).
p(f(a), 0).
p(0, f(b)).

p(l, X) :-. ((p(g(X), X) :- p(g(X), b)) :- p(f(g(X)), 0)).
% p(l, X) :- not(p(g(X), b)).
% p(l, X) :- not(p(f(g(X)), 0)).
% p(l, X) :- p(g(X), X).

p(g(X), b) :- not(p(l, X)).
p(f(g(X)), 0) :- not(p(l, X)).
not(p(g(X), X)) :- not(p(l, X)).

p(X, l) :- and([p(X, b), p(f(X), 0)]).

not(p(f(X), 0)) :- and([p(X, b), not(p(X, l))]).

% not(p(X, b)) :- not(p(X, l)), p(f(X), 0).

not(p(k(k(X)), X)) :- smallf(X).
not(p(X, h(h(X)))) :- largef(X).

% p(0, f(X)) :- and([p(k(k(X)), X), inrange(X)]).
% p(f(X), 0) :- and([p(X, h(h(X))), inrange(X)]).

p(0, f(X)) :- and([inrange(X), p(k(k(X)), X)]).
p(f(X), 0) :- and([inrange(X), p(X, h(h(X)))]).

p(f(X), 0) :- and([inrange(X), hrange(X,Y), p(f(Y), 0)]).

p(0, f(X)) :- and([inrange(X), krange(X,Y), p(0, f(Y))]).

not(p(f(Y), 0)) :- and([largef(X), hrange(X,Y)]).

not(p(0, f(Y))) :- and([smallf(X), krange(X,Y)]).

inrange(X) :-- and([p(a,X),p(X,b)]).

% smallf(X) :- and([inrange(X),not(p(0, f(X)))]).
% largef(X) :- and([inrange(X),not(p(f(X), 0))]).

% hrange(X,Y) :- and([p(Y, X), not(p(Y, h(h(X))))]).
% krange(X,Y) :- and([p(X, Y), not(p(k(k(X)), Y))]).

smallf(X) :-- and([not(p(0, f(X))),inrange(X)]).
largef(X) :-- and([not(p(f(X), 0)),inrange(X)]).

hrange(X,Y) :-- and([not(p(Y, h(h(X)))),p(Y, X)]).
krange(X,Y) :-- and([not(p(k(k(X)), Y)),p(X, Y)]).

p(Y, h(h(X))) :- and([largef(X), p(Y, X), p(f(Y), 0)]).

p(k(k(X)), Y) :- and([smallf(X), p(X, Y), p(0, f(Y))]).

% p(Y, h(h(X))) :- p(f(Y), 0), not(p(f(X), 0)), p(Y, X), p(a, X), p(X, b).
% p(k(k(X)), Y) :- p(0, f(Y)), not(p(0, f(X))), p(X, Y), p(a, X), p(X, b).

% instructions to the prover to derive facts of a certain form, then fail

small(X) :-- (not(p(f(Y), 0)):- not(p(f(X), 0))), fail.

small(X) :-- (not(p(f(Y), 0)):- not(p(f(X), 0))),
        (p(f(Y), 0) :- not(p(f(X), 0))).     % the easy part

big(X) :-- (not(p(0, f(Y))):- not(p(0, f(X)))), fail.

big(X) :-- (not(p(0, f(Y))):- not(p(0, f(X)))),
        (p(0, f(Y)) :- not(p(0, f(X)))).

false :-- small(X), fail.

% the theorem

false :-- small(X), big(X).

and([X|Y]) :-- X, and(Y).
and([]).

p(X, X).

% transposes of transitivity axiom

p(X, Z) :- prolog(var(X)), p(Y, Z), p(X, Y).
p(X, Z) :- prolog(nonvar(X)), p(X, Y), p(Y, Z).

% not(p(X,Y)) :- p(Y, Z), not(p(X, Z)).
% not(p(Y, Z)) :- p(X, Y), not(p(X, Z)).

p(X, Y) :- not(p(Y, X)).

% denseness of the reals
% use contrapositives for these axioms

not(p(X, q(q(Y, X)))) :- not(p(X, Y)).
not(p(q(q(X, Y)), X)) :- not(p(Y, X)).

% p(X, Y) :- p(X, q(q(Y, X))).
% p(Y, X) :- p(q(q(X, Y)), X).

% Proved Dec. 30 1986 size 5 312 seconds 1128 inferences with flags as
% follows:

%:- assert(b_only),
% proof_trace,
% assert(nosplits), assert(nosave(and(_))),
% assert(nosize),
% assert(maxsize),
% try(imv).

% and big_subgoal modified as follows:

%    big_subgoal((L :- B), SIZE) :-
%   \+ top_connective(L),
%   ( (length(B,Len), Len > 3) ;
%    (flatsize2(L,FS), FS > 11) ;
%    (member(X,B), flatsize(X,FX),
%     FX > 11)), !.

% newly_solved(0,id(1),(false:-[]))

% proof found
% false:-lemma((small(l):-[])),(big(l):-
%  lemma((not p(0,f(q(q(l,k(k(l)))))):-[not p(0,f(l))])),
%  (p(0,f(q(q(l,k(k(l)))))):-
%  (not p(f(q(q(l,k(k(l))))),0):-
%  (and([p(q(q(l,k(k(l)))),b),not p(q(q(l,k(k(l)))),l)]):-
%  lemma((p(q(q(l,k(k(l)))),b):-[not p(0,f(l))])),
%  (and([not p(q(q(l,k(k(l)))),l)]):-
%  lemma((not p(q(q(l,k(k(l)))),l):-[not p(0,f(l))])),and([]))))))
% size of proof 5

% 311.633 cpu seconds used
% 1128 inferences done

% proved later in 285 seconds, size 5, 1128 inferences after a
% minor bug was fixed

% The current version of the prover requires proof_size_multiplier(0)
% and solution_size_multiplier(0.125) to get this one and even then
% takes over 500 seconds for some reason

%   New Examples for C Prolog Theorem Prover
%   Note -- some times and number of inferences may differ
%   Each example must be in a separate file

%           Dave Plaisted
%           University of North Carolina at Chapel Hill
%           April 29, 1987

******************************************************************

% Problems in implicational logic from Lukasziewicz (?)
% sent by Frank Pfenning of CMU

thm(X) :- thm(imp(Y,X)), thm(Y).

thm(imp(imp(imp(P,Q),R),imp(imp(R,P),imp(S,P)))).

% false:-thm(imp(p,p)).
% March 4 1987 with assert(b_only) and assert(nosize), proved,
% size of proof 11
% 137.016 cpu seconds used
% 1812 inferences done
% March 12 with assert(nosize), solution_size_mult(0.0),
% proof_size_mult(1.0) obtained proof of size 18, 99.5 seconds,
% 582 inferences

% false:-thm(imp(p,imp(q,p))).
% Proved March 4 1987 with assert(nosize),
% size of proof 11
% 64.3833 cpu seconds used
% 481 inferences done
% March 12 with assert(nosize), solution_size_mult(0.0),
% proof_size_mult(1.0) obtained proof of size 18, 58 seconds,
% 357 inferences

thm(imp(P,P)).
false :- thm(imp(imp(imp(p,q),p),p)).
% could not be proven without thm(imp(P,P)) as a lemma (by Frank P.)
% proved March 10 with lemma thm(imp(P,P)) with new prover, using
% proof size multiplier 1 and solution size multiplier 0 and assert(nosize)
% 265.15 seconds, 1660 inferences, size 18
% Proved April 6 218 seconds 1331 inferences size 18

% Proved June 2 1987 (without P implies P lemma)
% size of proof 24
% 2037.23 cpu seconds used
% 5519 inferences done
% flags as above

******************************************************************
% Problems in ternary Boolean algebra tried by myself and Peter Schorn
% some previously done by various provers including AURA

% Use of equality axioms and interaction

f(Y,X,X) = X.
f(X,Y,g(Y)) = X.
f(f(V,W,X),Y,f(V,W,Z)) = f(V,W,f(X,Y,Z)).

U = V :-. X = U, prolog((X \== U)), X = V, prolog((X \== V)),
    prolog((U \== V)).
f(X,V,W) = U :-. f(X,Y,W) = U, prolog(nonvar(Y)), Y = V, prolog((Y \== V)),
    prolog((f(X,V,W) \== U)).
f(V,Y,W) = U :-. f(X,Y,W) = U, prolog(nonvar(X)), X = V, prolog((X \== V)),
    prolog((f(V,Y,W) \== U)).
f(X,W,V) = U :-. f(X,W,Y) = U, prolog(nonvar(X)), Y = V, prolog((Y \== V)),
    prolog((f(X,W,V) \== U)).

rewrite(f(Y,X,X), X).
rewrite(f(X,Y,g(Y)), X).
rewrite(f(f(V,W,X),Y,f(V,W,Z)), f(V,W,f(X,Y,Z))).

X = X.
X = Y :- Y = X.

% Part of Peter Schorn 282 project on ternary Boolean algebra


% Proved g(g(X)) = X in about a minute with user interaction
% April 28 1987

% Also proved a number of other equations fairly quickly
% including f(X,X,Y) = X, f(g(Y),Y,X) = X, f(X,Y,X) = X,
% f(X,Y,g(X)) = Y, f(X,Y,Z) = f(X,Z,Y), and others.
% Some were done using above representation, some with pulling
% out subterm idea and an automatic ordering function.
% Some were proven with the negation of the theorem, most without
% it (by pure forward chaining).

% The point of the three complicated clauses above is to simulate
% the generation of critical pairs in the Knuth-Bendix completion
% algorithm.

% Some of these proofs were done without the X = Y :- Y = X axiom.

******************************************************************

% Some simple problems from Smullyan's book To Mock a Mockingbird
% tried by myself and Michael Smith

W1 = W2 :- n(Y,Z) = V, n(X,V) = W1, comp(X,Y) = U, n(U,Z) = W2.
W1 = W2 :- n(mock,X) = W1, n(X,X) = W2.
X = X.

X = Y :- Y = X.


%W1 = W2 :- n(l, X) = U, n(U, Y) = W1,
%       n(Y,Y) = V, n(X,V) = W2.
%
%false :- W1 = X, n(X,X) = W1.
%
%proof found April 29 1987
%false:-(n(mock,comp(mock,mock))=n(mock,n(mock,comp(mock,mock))):-n(mock,comp(mock,mock))=n(mock,comp(mock,mock)),lemma((n(comp(mock,mock),comp(mock,mock))=n(mock,n(mock,comp(mock,mock))):-[]))),n(mock,comp(mock,mock))=n(mock,comp(mock,mock))
%size of proof 7
%31.7 cpu seconds used
%121 inferences done


% Another proof:
% Proved 6 seconds 26 inferences
% false :- W1 = X, n(b,X) = W1.
% Done for Michael Smith COMP 282 project April 1987

******************************************************************

% Three group axioms from Knuth and Bendix' paper

e * X = X.
g(X) * X = e.
(X * Y) * Z = X * (Y * Z).

eq(U, V) :-. X = U, prolog((X \== U)), X = V, prolog((X \== V)),
    prolog((U \== V)).
eq(X * V, U) :-. X * Y = U, prolog(nonvar(Y)), Y = V, prolog((Y \== V)),
    prolog((X * V \== U)).
eq(V * Y, U) :-. X * Y = U, prolog(nonvar(X)), X = V, prolog((X \== V)),
    prolog((V * Y \== U)).

% X = Y :- Y = X.

X = Y :- eq(X,Y).
% Y = X :- eq(X,Y).
eq(X,X).

rewrite(e * X, X).
rewrite(g(X) * X, e).
replace(X * Y = X * Z, Y = Z).
replace(Y * X = Z * X, Y = Z).
rewrite((X * Y) * Z, X * (Y * Z)).
X = X.

% March 18 1987 completed this system to yield the usual 10 rule
% canonical term rewriting system, in about 5 minutes of
% cpu time with psm(0) and ssm(0.25) and assert(f_only).

% March 19 with back simplification and flags as above about 5 1/2 minutes
% took about 5 minutes with psm(0), ssm(0.125)

% about the same time with psm(0), ssm(0) and better cost for prolog
% subgoals

% about 3 minutes with psm(0), ssm(0.125), more efficient back
% simplification March 20 1987

% March 31 psm(.4) ssm(.1) completed in 2 minutes 30 seconds for some
% reason

% March 31 psm(.4) ssm(.1) completed in less than 2 minutes with more
% hand delaying of large g terms

******************************************************************

%
% Salt and Mustard problem, from J.A.R.
%
false :- oneof(X), both(X).
false :- oneof(X), neither(X).
false :- both(X), neither(X).
false :- neither(X), salt(X).
false :- neither(X), mustard(X).

% contrapositives added to make proof faster

not oneof(X) :- both(X).
not oneof(X) :- neither(X).
not both(X) :- neither(X).
not neither(X) :- salt(X).
not neither(X) :- mustard(X).

% the answer
mustard(mill) :- neither(dix), neither(cole), both(barry), 
         mustard(lang), salt(mill), not(salt(lang)).

both(X) :- salt(X), mustard(X).
both(dix) :- mustard(cole), not(both(lang)).
both(barry) :- salt(mill), not(both(lang)).
both(X) :-  not(neither(X)), not(oneof(X)).

salt(X) :- both(X).
salt(dix) :- neither(barry).
salt(dix) :- both(cole).
salt(cole) :- oneof(barry).
salt(cole) :- neither(mill).
salt(barry) :- oneof(cole).
salt(barry) :- oneof(cole).
salt(barry) :- oneof(lang).
salt(lang) :- oneof(barry).
salt(lang) :- oneof(dix).
salt(mill) :- both(barry).
salt(X) :- oneof(X), mustard(X).
salt(X) :- oneof(X), not(mustard(X)).

neither(X) :- not(salt(X)), not(mustard(X)).
neither(dix) :- mustard(barry), not(both(mill)).
neither(barry) :- salt(dix), not(both(cole)).
neither(lang) :- mustard(dix), not(neither(mill)).
neither(cole) :- mustard(lang), not(neither(mill)).

oneof(barry) :- salt(cole), not(neither(mill)).
oneof(cole) :- salt(barry), not(oneof(lang)).
oneof(barry) :- salt(lang), not(oneof(dix)).
oneof(cole) :- mustard(mill), not(oneof(dix)).

mustard(X) :- both(X).
mustard(barry) :- neither(dix).
mustard(barry) :- both(mill).
mustard(cole) :- both(dix).
mustard(cole) :- both(lang).
mustard(dix) :- neither(lang).
mustard(dix) :- neither(mill).
mustard(lang) :- neither(cole).
mustard(lang) :- neither(mill).
mustard(mill) :- both(lang).
mustard(mill) :- oneof(cole).
mustard(mill) :- oneof(dix).

%size of proof 18
%7724.76 cpu seconds used
%3367 inferences done
% April 2, 1987 on SUN3 without contrapositives

%size of proof 18
%1926.9 cpu seconds used
%1613 inferences done April 7 1987 on SUN 3 with contrapositives

******************************************************************

% Latin squares problem, new forward chaining approach

false :- not eq(X,X).
eq(X,X).
false :- eq(X,Y), not eq(Y,X).
false :- lat(X,Y,Z), lat(X,Y,U), not eq(Z,U).
false :- lat(X,Y,Z), lat(X,U,Z), not eq(Y,U).
false :- lat(X,Y,Z), lat(U,Y,Z), not eq(X,U).
false :- gre(X,Y,Z), gre(X,Y,U), not eq(Z,U).
false :- gre(X,Y,Z), gre(X,U,Z), not eq(Y,U).
false :- gre(X,Y,Z), gre(U,Y,Z), not eq(X,U).
false :- not lat(X,Y,a), not(lat(X,Y,b)).
false :- not lat(X,a,Y), not(lat(X,b,Y)).
false :- not lat(a,X,Y), not(lat(b,X,Y)).
false :- not gre(X,Y,a), not(gre(X,Y,b)).
false :- not gre(X,a,Y), not(gre(X,b,Y)).
false :- not gre(a,X,Y), not(gre(b,X,Y)).
false :- lat(X,Y,Z), gre(X,Y,U), lat(V,W,Z), gre(V,W,U),
not eq(Y,W), not eq(X,V).
false :- eq(a,b).
false :- eq(b,a).

% Proved 210 seconds 504 inferences size 18 April 21 1987 with
% default multipliers

% Proved 524 seconds 1450 inferences size 18 April 21 1987 with
% psm(0) and ssm(0.125) and lots of time spent after deriving
% false :- eq(X,Y).

% As above but with no failure on repeated solutions, proved same
% number of inferences in 436 seconds.

% As above but with default multipliers and no case analysis,
% 183 seconds, 512 inferences, size 18, long time after derived
% false :- not eq(X,Y).  Had very little core allocated.  April 24 1987.

not eq(a,b).
not eq(b,a).

% with the above two clauses added, all else as before, proved
% 154 seconds 581 inferences size 18 small core April 24 1987

% as above but with negative unit derivation allowed, proved
% 179 seconds 564 inferences size 18 April 27, 1987
