
\documentstyle[12pt]{article}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% \newfont{\mmk}{cmr6} % For gremlin size 8

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% COMMANDS FOR COMMENTING, REMINDERS, EATING TEXT etc.
%    \eat{..}: 
%	Useful for temporarily removing text from the output.
%    \comment{..}:
%    \inlinecomment{..}:
%    \reminder{..}:  
%	This is a very useful command.  It adds the argument as inline text
%	delimited by [[ .. ]].  It creates an arrow in the right margin
%	which makes it very easy to locate reminders.  Can be redefined to
%	eat up reminders when creating a version of the paper for
%	distribution.
%    \fullversion{..}:
%%%%%%%%%%

\newcommand{\eat}[1]{}
% \newenvironment{comment}{\noindent \begin{footnotesize} \{{\bf Comment:}}{\}\end{footnotesize}}

\newcommand{\comment}[1]{\noindent \begin{footnotesize} {\bf Comment:} #1 \end{footnotesize}\\}

\newcommand{\inlinecomment}[1]{\begin{quotation} [[[ #1 ]]] \end{quotation}}
\newcommand{\reminder}[1]{ [[[ \marginpar{\mbox{$<==$}} #1 ]]] }
\newcommand{\eatreminders}[0]{\renewcommand{\reminder}[1]{}}
\newcommand{\fullversion}[1]{ }


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%   The following commands can be changed to alter several 
%	commands/environments defined in this file 
%
%  \noBox:
%     	The \condBox command is used in several environments below, such as
%     	propositions to generate a box at the end of the environment.  
%     	If you don't want a box be generated, insert the command
%     	\noBox within the desired environment.  You can even add it to the
%     	beginning of the document for it to have a global effect.
%
%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\condBox}{{\raggedleft \mbox{$ \Box $}}}
\newcommand{\noBox}{\renewcommand{\condBox}{}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SPACE CRUNCHING COMMANDS
%  \crunchspace: 
%	use small font in several environments (to find out in which
%	look for uses of \crunchbegin and \crunchend
%  \spcrunch:
%	Eats vertical space.
%  \sections:
%  \subsections:
%  \subsubsections:
%  \subsubsubsections:
%	The above four commands eat vertical space around section names.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\spcrunch}{\vspace{-0.15in}}


\newcommand{\crunchbegin}{} % These two commands are redefined to \begin{small}
\newcommand{\crunchend}{}   % and \end{small} if space crunching is required

\newcommand{\crunchspace}{
	\renewcommand{\crunchbegin}{\begin{small}}
	\renewcommand{\crunchend}{\end{small}}
    }

\newcommand{\sections}[1]{\vspace{-0.2in} \section{#1} \vspace{-0.15in}}
\newcommand{\subsections}[1]{\vspace{-0.1in} \subsection{#1} \vspace{-0.1in}}
\newcommand{\subsubsections}[1]
	{\vspace{-0.1in} \subsubsection{#1} \vspace{-0.1in}}
\newcommand{\subsubsubsections}[1]
	{\vspace{-0.2in} \subsubsubsection{#1} \vspace{-0.1in}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% MISC. COMMANDS.
%
%   \tuple{..}:
%	Creates an overline to indicate that this is a vector/tuple.
%
%   \lbok:
%	Tells latex that it's ok to break a line here.
%%%%%%%%%%%%%%%

\newcommand{\tuple}[1]{\overline{#1}}
\newcommand{\lbok}{\linebreak[0]}

\newcommand{\semijoin}{\mathrel{\raise1pt\hbox{\vrule
	height5pt depth0pt\hskip-1.5pt$>$\hskip -2.5pt$<$}}}
\newcommand{\fd}{\rightarrow}
\newcommand{\mvd}{\rightarrow\!\rightarrow}

\newcommand{\intersection}{\cap}
\newcommand{\union}{\cup}
% \newcommand{\ifff}{\mbox{$ \leftarrow $}} 
\newcommand{\ifff}{\mbox{$ :- $}} 
   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Environments for Definitions, Lemmas, Theorems, Propositions etc:
%  The environments must be used in one of the following way:
%  	Environments that do not start with "named" do not take any 
%		parameters and are used as follows.
%  	\begin{definition} 
%         	... fill in definition ;  
%  	\end{definition}
%  	Environments that take parameters are used as follows:
%  	\begin{nameddefinition}{name of definition} 
%         	... fill in definition 
%  	\end{nameddefinition}
%  The numbering of the environments is done independently for each
%  environment type.  This can be changed if desired.
%
%  List of Environments:
%  --------------------
%	theorem
%	namedtheorem
%	lemma
%	namedlemma
% 	proposition	(Creates a box at the end)
%	namedproposition	(Creates a box at the end)
%	definition	(Creates a box at the end)
%	nameddefinition	(Creates a box at the end)
%	example		(Creates a box at the end)
%	namedexample	(Creates a box at the end)
%	assumption
%	claim
%	conjecture	(Creates a box at the end)
%	property	(Creates a box at the end)
%	namedproperty	(Creates a box at the end)
%
% WARNING:  When defining environments, DO NOT leave gaps between the 
%	    argument definitions.  Latex screws up if this is done.
%	Eg.  \newenvironment{haha}{..}{..} is ok,
%	BUT  \newenvironment{haha} {..} {..} is NOT OK.

%%%%%%%%%%%%%%%%%%%%%%%%%%
%  The following set the numbering style for several environments defined
%	above.
%%%%
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemm}{Lemma}[section]
\newtheorem{prop}{Proposition}[section]
\newtheorem{coroll}{Corollary}[section]
\newtheorem{conj}{Conjecture}[section]
\newtheorem{claims}{Claim}[section]

\newtheorem{ex}{Example}[section]
\newtheorem{alg}{Algorithm}[section]
\newtheorem{defin}{Definition}[section]
\newtheorem{assump}{Assumption}[section]
\newtheorem{propty}{Property}[section]

%% Environments with merged numbering
\newtheorem{mtheorem}{Theorem}[section]
\newtheorem{mlemm}[theorem]{Lemma}
\newtheorem{mprop}[theorem]{Proposition}
\newtheorem{mcoroll}[theorem]{Corollary}
\newtheorem{mconj}[theorem]{Conjecture}
\newtheorem{mclaims}[theorem]{Claim}

\newtheorem{mdefin}{Definition}[section]
\newtheorem{massump}[defin]{Assumption}
\newtheorem{mpropty}[defin]{Property}
\newtheorem{mex}{Example}[section]
\newtheorem{malg}{Algorithm}[section]

\newcommand{\propc}{prop}
\newcommand{\lemmc}{lemm}
\newcommand{\corollc}{coroll}
\newcommand{\conjc}{conj}
\newcommand{\claimsc}{claims}

\newcommand{\separatenumbering}{
    \renewcommand{\propc}{prop}
    \renewcommand{\lemmc}{lemm}
    \renewcommand{\corollc}{coroll}
    \renewcommand{\conjc}{conj}
    \renewcommand{\claimsc}{claims}
}

\newcommand{\mergednumbering}{
    \renewcommand{\propc}{mprop}
    \renewcommand{\lemmc}{mlemm}
    \renewcommand{\corollc}{mcoroll}
    \renewcommand{\conjc}{mconj}
    \renewcommand{\claimsc}{mclaims}
}

\newenvironment{namedtheorem}[1]{\begin{theorem}{\bf (#1)\hspace{1mm}:} \begin{rm} \crunchbegin }{ \crunchend \end{rm} \end{theorem}}

\newenvironment{proposition}{\begin{\propc} \crunchbegin }{\condBox \crunchend \end{\propc}}

\newenvironment{namedproposition}[1]{\begin{proposition}{\bf (#1)\hspace{1mm}:} \begin{rm} \crunchbegin }{ \crunchend \end{rm} \end{proposition}}

\newenvironment{lemma}{\begin{\lemmc} \crunchbegin }{\crunchend \end{\lemmc}}

\newenvironment{namedlemma}[1]{\begin{lemma}{\bf (#1)\hspace{1mm}:} \begin{rm} \crunchbegin }{ \crunchend \end{rm} \end{lemma}}

\newenvironment{corollary}{\begin{\corollc} \crunchbegin }{\crunchend \end{\corollc}}

\newenvironment{conjecture}{\begin{\conjc} \crunchbegin }{\condBox \crunchend \end{\conjc}}

\newenvironment{claim}{\begin{\claimsc} \crunchbegin }{\crunchend \end{\claimsc}}

\newenvironment{namedproof}[1]{\noindent {\bf Proof:}~(#1)\\ \nopagebreak \crunchbegin}{\crunchend {\raggedleft$\Box$}}

\newenvironment{proof}{\noindent {\bf Proof}: \crunchbegin \nopagebreak}{\crunchend {\raggedleft$\Box$}}

\newenvironment{namedexample}[1]{\begin{ex}{\bf (#1)}\\ \crunchbegin \begin{rm}}{\condBox \end{rm} \crunchend \end{ex}}

\newenvironment{example}{\begin{ex} \nopagebreak \crunchbegin \begin{rm}}{\condBox \end{rm} \crunchend \end{ex}}

\newenvironment{nameddefinition}[1]{\begin{defin}{\bf #1\hspace{1mm}:} \begin{rm} \crunchbegin }{ \condBox \crunchend \end{rm} \end{defin}}

\newenvironment{definition}{\begin{defin} \begin{rm} \crunchbegin }{\condBox \crunchend \end{rm} \end{defin}}

\newenvironment{assumption}{\begin{assump} \crunchbegin }{\condBox \crunchend \end{assump}}

\newenvironment{namedproperty}[1]{\begin{propty}{\bf #1\hspace{1mm}:} \begin{rm} \crunchbegin }{ \condBox \crunchend \end{rm} \end{propty}}

\newenvironment{property}{\begin{propty} \begin{rm} \crunchbegin }{\condBox \crunchend \end{rm} \end{propty}}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THE ALGORITHM command
%
%    \algorithm:
%    	The command \algorithm, takes one argument, to format
%	algorithms.  It sets font to \sf, and sets up a tabbing environment,
%	and creates lines above and below the algorithm to set off the 
%	algorithm from other text.  
%	
%
%    \ordinalg:
%    	If you don't want the separating lines, but want other features of the
%	\algorithm command, use the command \ordinalg
%
%   The default tabbing for algorithms is four spaces.	If you use 
%	\setprocdefaults this is changed to two spaces.
%
%    \pseudocode:
%	This commands creates the separating lines just like \algorithm,
%	but does not set up tabbing mode.
%
%  \setalgseplinelen{<length>}:
%   	The default length of the separating line is 6 inches.  If you use
%	\setprocdefaults this is reduced.  If you want finer control use this
%	command.
%   

%%%%%%%%%%%%%%%%%
\newcommand{\algtab}{1234}

\newcommand{\setalgtab}[1]{\renewcommand{\algtab}{#1}}

\newcommand{\algseplinelen}{6in}
\newcommand{\setalgseplinelen}[1]{\renewcommand{\algseplinelen}{#1}} 
\newcommand{\algsepline}{\rule{\algseplinelen}{0.1mm}\\}

\newcommand{\ordinalg}[1]{\begin{sf} \crunchbegin \begin{tabbing}
\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\kill
#1 
\end{tabbing} \crunchend \end{sf}}

\newcommand{\algorithm}[1]{\begin{sf} \crunchbegin \begin{tabbing}
\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\kill
\raisebox{4ex}[5ex][2mm]{\algsepline}
#1 \\
\raisebox{0ex}[.1mm][.1mm]{\algsepline}
\end{tabbing} \crunchend \end{sf}}

\newcommand{\pseudocode}[1]{\begin{sf} \crunchbegin 
\noindent \raisebox{0mm}[.2mm][.2mm]{\algsepline} \\
#1 
\noindent \raisebox{0ex}[.1mm][.1mm]{\algsepline} \\
\crunchend \end{sf}}

\newcommand{\moduleprog}[1]{\noindent 
\raisebox{4ex}[5ex][2mm]{\algsepline}
#1 \\
\raisebox{0ex}[.1mm][.1mm]{\algsepline}
}

%% \newenvironment{namedalgorithm}[1]
%%   {{\bf #1}\\ \nopagebreak\begin{sf}\begin{small}\begin{tabbing}
%% \algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\algtab\=\kill}
%%   {\end{tabbing}\end{small}\end{sf}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%
% See also redefinition of condition in \procdefaults mode
%%%%%%%%%%%%%

\newcommand{\condition}[2]{\crunchbegin \begin{description}\item [Condition #1]:#2 \condBox \end{description} \crunchend }

\newcommand{\proctechnique}[2]{\crunchbegin \noindent {\bf Technique #1}:~#2 \condBox \crunchend }

\newcommand{\proccondition}[2]{\crunchbegin \noindent {\bf Condition #1}:~#2 \condBox \crunchend }

%%%%
% This command ( \technique) is useful to desccribe a collection of short 
% techniques.  Each of the techniques begins with an \item [<description>] 
% The list of techniques is demarcated by long dividing lines.
%%%%
\newcommand{\technique}[2]{\begin{sf} \crunchbegin
\noindent \raisebox{0mm}[.2mm][.2mm]{\algsepline}
\vspace{-2mm}
\begin{description}
\item [Technique #1:] #2 
\end{description}
\vspace{-2mm}
\noindent \raisebox{0ex}[.1mm][.1mm]{\algsepline} \\
\crunchend \end{sf}}

\newcommand{\techniques}[1]{\begin{sf} \crunchbegin
\noindent \raisebox{0mm}[.2mm][.2mm]{\algsepline}
\vspace{-2mm}
\begin{description}
#1 
\end{description}
\vspace{-2mm}
\noindent \raisebox{0ex}[.1mm][.1mm]{\algsepline} \\
\crunchend \end{sf}}

%% The command \itemizes and \enumerates are used as replacements for 
%% \itemize and \enumerate, that save space.
%% Use the command \items within these commands instead of \item
%% The enumerates command currently does not generate numbers.  It should
%% be fixed.
%%

	    \newcommand{\items}{\\ \hspace*{.75cm} $ \bullet $~} 

\newenvironment{itemizes}{}{} 

\newenvironment{enumerates}{ \newcommand{\items}{\\ \hspace*{.75cm}}}{} 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Rule Formatting Section
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   \logicprog{...}:
% 	The command \logicprog creates an inline logic program.
%	It formats the program in a nice manner.
%	Several commands described below can be used to create rules withing
%	the program.
%	NOTE:  \\ must be used to create new lines, or latex will complain.
%
%   \logicprogfig{ <caption and optional label> }{...}:
% 	The command \logicprogfig creates a separate figure.  It takes two
% 	arguments - a caption and then the logic program itself within { ... }
%
%   Commands for use within logicprog mode:
%   --------------------------------------
%
%   \lfact {<Rulename>} {<fact>}
%   \lrule {<Rulename>} {<Rulehead>} {<Rulebody>}
%   \lquery {<query>}
%   \lcomment{<C style comment>}
%   \lheader {<Any text that spans the entire line>}
%   \lrulecont {<Rulename>} {<Rulehead>} {<PartialRulebody>}
%   \lcontrule {} 	    {}		 {<Rest of rule body>.}
%
%   \setrulebodywidth{<width>}:
%	Particularly useful when non-standard page width is used.
%
%   Commands for use outside logicprog mode:
%   ---------------------------------------
%    \hornclause {<Rulename>} {<Rulehead>} {<Rulebody>}
%    \lhornclause {<Rulename>} {<Rulehead>} {<Rulebody>} {<RuleLabel>}
%    \inlineclause {<Rulename>} {<Rulehead>} {<Rulebody>}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\newcommand{\lheader}[1]{\multicolumn{4}{l}{\mbox{#1}}}
\newcommand{\lcomment}[1]{\multicolumn{4}{l}{\mbox{/* #1 */}}}
\newcommand{\lannotation}[1]{\multicolumn{4}{l}{\mbox{#1}}}
\newcommand{\lrule}[3]{#1&#2&\ifff&$ #3. $}
\newcommand{\lrulecont}[3]{#1&#2&\ifff&$ #3 $}
\newcommand{\lcontrule}[3]{#1&#2&&$ #3$}
\newcommand{\lqrule}[4]{#1&#2&\mbox{$\leftarrow #3 -$}&$ #4. $}
\newcommand{\lfact}[2]{#1&\multicolumn{3}{l}{#2.}}
\newcommand{\lquery}[1]{\multicolumn{4}{l}{\mbox{Query: ?-$ #1 .$}}}
\newcommand{\hornclause}[3]{\[ #1~#2 \ifff #3. \]}
\newcommand{\lhornclause}[4]{\[ #1~#2 \ifff #3. #4 \]}
\newcommand{\inlineclause}[3]{$ #1~#2 \ifff #3$}

\newcommand{\rulebodywidth}{4.5in}
\newcommand{\setrulebodywidth}[1]{\renewcommand{\rulebodywidth}{#1}}

\newcommand{\logicprog}[1]{\[ {\renewcommand{\arraycolsep}{.5mm}\renewcommand{\arraystretch}{.75}\begin{array}{lllp{\rulebodywidth}}#1\end{array}}\]}
%

\newcommand{\logicprogfig}[2]{\begin{figure}\[ 
	{\renewcommand{\arraycolsep}{.5mm}\renewcommand{\arraystretch}{.75}
	\begin{array}{lllp{\rulebodywidth}}#2\end{array}}\]\vspace{-5mm}
	\caption{#1}\end{figure}}
\newcommand{\widelogicprogfig}[2]{\begin{figure*}\[ 
	{\renewcommand{\arraycolsep}{.5mm}\renewcommand{\arraystretch}{.75}
	\begin{array}{lllp{\rulebodywidth}}#2\end{array}}\]\vspace{-5mm}
	\caption{#1}\end{figure*}}
\newcommand{\logicprogcfig}[3]{\begin{figure}[#1]\[ 
	{\renewcommand{\arraycolsep}{.5mm}\renewcommand{\arraystretch}{.75}
	\begin{array}{lllp{\rulebodywidth}}#3\end{array}}\]\vspace{-5mm}
	\caption{#2}\end{figure}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SETTING PROC OR NORMAL ENVIRONMENT
%   \setnormaldefaults:
%	Set up a number of defaults such as text width/height etc.,
%	for normal format.
%   \setprocdefaults:
%	Set up a number of defaults for two column format.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\setnormaldefaults}{
% 
\input{psfig}
% page layout
\addtolength{\topmargin}{-0.8in}    % move top margin up
\setlength{\textheight}{8.85in}      % text height
\addtolength{\oddsidemargin}{-0.5in} 
\setlength{\textwidth}{6.25in}
\addtolength{\columnsep}{2.5ex}
\addtolength{\itemsep}{-1ex}
\addtolength{\topsep}{-1.5ex}
\addtolength{\parskip}{2ex}
\abovedisplayskip=2mm  % Spacing for math line mode formulas (using \[ \])
\belowdisplayskip=2mm
\abovedisplayshortskip=1mm  % As above, for formulas that start beyond the 
			    % last character of previous line
\belowdisplayshortskip=2mm
\topsep=-1mm  % The amount of space in addition to \parskip, above and 
	      % below list envs (eg. enumerate).  Beware of negative spacing 
	      % if parskip is too small
\partopsep=0mm % As above, when there is a blank line preceding the list
\renewcommand{\baselinestretch}{1.1}    % one and a half spacing approx.
\mergednumbering			% Theorems, lemmas etc are numbered 
					% together.
}

\newcommand{\setprocminordefaults}{
\setlength{\topskip}{10pt}
\setlength{\headsep}{0pt}
\setrulebodywidth{2in} \setalgtab{12}
\setalgseplinelen{2.5in}
\setlength{\parskip}{1mm}
\renewcommand{\condition}{\proccondition}
% \crunchspace    % use small font in several environments (to find out in which
		% look for uses of \crunchbegin and \crunchend
\mergednumbering		% Theorems, lemmas etc are numbered 
				% together.
}

\newcommand{\setprocdefaults}{
\setnormaldefaults
% \renewcommand{\maketitle}{??} - fill this in 
\setlength{\oddsidemargin}{-.20in}
\setlength{\topmargin}{-.40in}  % In order to get .75in seperation
\setlength{\textheight}{9.25in}
\setlength{\textwidth} {6.99in}
\setlength{\columnsep} {0.33in}
\setprocminordefaults

% \setlength{\topskip}{10pt}
% \setlength{\headsep}{0pt}
% \setrulebodywidth{2in} \setalgtab{12}
% \setalgseplinelen{2.5in}
% \setlength{\parskip}{1mm}
% \renewcommand{\condition}{\proccondition}
% \crunchspace	% use small font in several environments (to find out in which
		% % look for uses of \crunchbegin and \crunchend
% \mergednumbering		% Theorems, lemmas etc are numbered 
				% % together.
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\setnormaldefaults

\renewcommand{\ifff}{\mbox{$ :- $}}
\newcommand{\inguide}[1]{ }
\newcommand{\implnote}[1]{\footnote{#1}}
\newcommand{\inpaper}[1]{#1}

\renewcommand{\comment}[1]{}
\renewcommand{\fullversion}[1]{}
\newcommand{\heading}[1]{\noindent{\bf #1:}}

\author{Raghu Ramakrishnan \and
Praveen Seshadri \and
Divesh Srivastava \and
S. Sudarshan \\
{\em Computer Sciences Department,}\\
{\em University of Wisconsin--Madison, WI 53706, U.S.A.}}

\date{}

\begin{document}

\title{\bf A User's Introduction to CORAL\thanks
{The authors' e-mail addresses are 
\{raghu,divesh,praveen\}@cs.wisc.edu; sudarsha@att.com.}}

\maketitle

\newpage

\section{Introduction}
\label{chap:intro}
\input{intro.chap}
\newpage

\section{A First Session}
\label{chap:start}
\input{start.chap}
\reminder{discuss things like the coralrc file}
\newpage

\section{Declarative Language Features:  Basics}
\label{chap:declba}
\input{declba.chap}
\newpage

\section{Declarative Language Features:  Negation}
\label{chap:declne}
\input{declne.chap}
\newpage

\section{Declarative Language Features:  Sets and Multisets}
\label{chap:declse}
\input{declse.chap}
\newpage

\section{Declarative Language Features:  Advanced}
\comment{Choice, aggregate selections, head updates, prioritization}
\label{chap:declad}
\input{declad.chap}
\newpage

\section{Modules in CORAL}
\comment{styles, neg/grouping, save modules ...}
\label{chap:modules}
\input{modules.chap}
\newpage

\section{Declarative Language Features:  Annotations and Control}
\label{chap:declac}
\input{declac.chap}
\newpage

\section{CORAL Commands}
\comment{also discuss updates, persistent relations,
multiple workspaces and debugging here}
\label{chap:cmds}
\input{cmds.chap}
\newpage

\section{IO in CORAL}
\label{chap:io}
\input{io.chap}
\reminder{Add something here about IO commands of various kinds.
	Refer to the help feature mostly, but include
	examples of readtable etc.
	}
\newpage


\section{Imperative Modules}
\comment{discuss rule style updates here, as well as built-ins and 
extended c++ mode}
\label{chap:impmod}
\input{impmod.chap}
\newpage

\section{Extensibility in CORAL}
\label{chap:extens}
\input{extens.chap}
\newpage

\section{Current Status}
\label{sec:currstat}
\input{currstat.chap}
\reminder{ things for the next release:
- per-predicate trace/profile; graphical debugging
- derived persistent relations
- multiple <> in rule heads (plus multiple rules with <> in hd, unless handled now)
- improve implem of arith exprs; constraints
- logical expressions, if-then-else
- memory management, esp for persistent relns
- tuple values, named attr notation
- ordered scans, predicates in scans
- array relations
- occur chk, TRO, agg opt from sud thesis
- and cleaning code :-)
}
\newpage

\section{Acknowledgements}

Per Bothner
played a principal role in the implementation of the first prototype
of CORAL.
Also, several other people --- Tarun, Bill, Vish, Chong, Ball, ...

This work was 
supported by a David and Lucile Packard Foundation Fellowship
in Science and Engineering, a Presidential Young Investigator Award,
with matching grants from Digital Equipment Corporation, Tandem and Xerox,
and NSF grant IRI-9011563.


\bibliographystyle{alpha}
\bibliography{lib/bib/moredb,lib/bib/dbimp,lib/bib/clp}

\newpage

\appendix

\section{The CORAL Installation Guide}
\label{chap:install}
\input{install.chap}
\newpage

\section{CORAL/C++ Interface Specifications}
\label{chap:intspec}
\input{intspec.chap}
\newpage

\end{document}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\sections{Introduction}
\label{sec:intro}

CORAL\footnote{The CORAL project was initiated in 88-89 --- under the name 
Conlog --- and an initial overview 
was presented in \cite{rbss90:coral}.
CORAL stands for ``COntrol, Relations And Logic''.}
is a database programming language being developed at the University
of Wisconsin--Madison.
It seeks to combine features of a database query language, such as
efficient treatment of large relations, aggregate
operations and declarative semantics,
with those of a logic programming language, such as 
more powerful inference capabilities and support for 
incomplete and structured data.
CORAL significantly extends the expressiveness of standard
database query languages such as SQL, and differs from logic
programming languages such as Prolog in supporting a modular
and non-operational semantics.
Applications in which large amounts of data must be extensively
analyzed are likely to benefit from this combination of features.
In particular, CORAL is attractive for
sequence data analysis, natural language processing,
temporal queries, and bill-of-materials and other transitive closure
related applications.  

A CORAL program is a collection of {\em modules}: at most
one {\em imperative} module and any number 
of {\em declarative} and {\em command} modules.
Modules can be compiled separately.


\section {Declarative Language Features:  Background}
\label {sec:background}

We present the basic ideas behind Horn clause logic programs
and their declarative semantics in this section.
These concepts form the basis for the declarative features
of CORAL.
 
\subsection{Terms, Facts and the Database}
\label{sec:database}
 
CORAL supports several types of atomic objects such as
arbitrary precision integers,
floating point values, strings, etc.
\footnote{The implementation uses the $bignum$ package from 
DEC's Paris Research Lab.}
Some examples: {\em 1, 2, 5.67, john, "mary had a little lamb" }.
As in logic programming, variables in CORAL are strings that 
start with an uppercase letter.
Examples of variables include $ X, X1, Date, Name $.
Formally, a variable is denoted by a sequence of letters, numbers or
``\_'', starting with an uppercase letter.
Since we allow nested terms, variables can occur within 
terms, and not just as arguments of a fact.
A {\em ground value} is one that does not contain any variables.

Strings in the declarative language are either sequences of letters and
numerals (starting with a lowercase letter), or they may be quoted, in 
which case they 
can contain arbitrary characters (a $ " $ is treated as the end of the string,
and $ \backslash " $ is interpreted as a character $ " $ withing the string. 

CORAL also supports complex objects through the use of {\em functors}
to create nested tuples.
For instance, one can create a value $ address(number, street, city, state, 
zipcode, country) $, and use it as an argument of a tuple.
The nesting can be of arbitrary depth, and 
this permits one to create nested data structures.
For instance, trees can be created using a 
functor $ node(data, leftchild, rightchild) $ where $ leftchild $ and 
$ rightchild $ could be either $ null $ or could be nested structures 
created using $ node $.
Lists of values can also be created in this fashion;
following logic programming systems, CORAL provides special syntax
for creation of lists.
Thus $ [1,2,3] $ denotes a list containing three elements, $ 1, 2 $ and $ 3 $
in that order.
Elements of lists can, of course, be nested structures themselves.
The syntax $ [1 | [2,3]] $ denotes a list with first element $ 1 $ and with
the tail being the list $ [2,3] $. 
This is exactly equivalent to the list $ [1,2,3] $, but can be used to 
attach an element to the head of an existing list, or conversely to 
split a list into its head and tail.
 
An {\em atom} or {\em atomic literal} consists of a predicate/relation
name, with a tuple of arguments.
Examples are $ edge(1,2)$ and $ ancestor(X,Y) $.
The term {\em fact} is often used to denote an atom.

\subsection {Rules}
\label{sec:rules}
 
Horn clause rules consist of a {\em head} and a {\em body}.
The body is a conjunction of {\em literals}, each of which is either positive
or negative.  The head is a positive literal.  Informally,
a rule is to be read as an assertion that for all assignments of terms
to the variables that appear in the rule, the head is true if the body is true.
A {\em fact} is a rule with an empty body.
In the deductive database literature, it is common to distinguish a
set of facts as the EDB or {\em extensional database}, and to refer to
the set of rules as the IDB or the {\em intensional database}.
The significance of the distinction lies in the fact that at compile time,
only the IDB is examined; the EDB is viewed as an input. 

Consider a rule 
\hornclause{}{ancestor(X,Y)}{parent(X,Z), ancestor(Z,Y)}
and suppose we have the facts $ parent(1,4) $ and $ ancestor(4,5)$.
We ``unify'' $ parent(1,4) $ with the first literal of the rule,
by setting $ X \leftarrow 1, Z \leftarrow 4 $.
Now we can further unify $ ancestor(4,5) $ with the second literal of the
rule by setting $ Y \leftarrow 5 $ ($ Z $ has already been assigned a value).
Since all the literals in the rule have been unified with facts, we can
now derive the head fact $ ancestor(1,5) $ (this is given by the
assignment of values to the variables).

Rules with negative body literals and/or set-grouping in the head are
discussed in Sections \ref{sec:neg} and \ref{sec:sets}.

\subsection{Semantics}

The meaning of a collection of Horn clause rules is given by their
least model.  For the purpose of assigning semantics to a program,
the distinction between EDB and IDB is irrelevant; the EDB is just
a collection of simple rules, and is considered to be part of the
program along with the IDB rules.  Operationally, every fact
--- rule with empty body --- can be viewed as an assertion that is
unconditionally true. By using facts that have been established
to be true to instantiate literals in the body of a rule,
we can established that the (instantiated) head fact is also true.
If every rule in a program is applied thus in all possible ways
to a set of facts, we can view a program as an operator that
can be applied to generate new facts; viewed thus, the meaning
of a program can also be taken to be the least fixpoint of this
operator.  That is, a program denotes a minimal set of facts such that
all rules with empty bodies are included, and further, no application
of a rule yields a new fact.  It is well known that the least
fixpoint and least model semantics are equivalent for the case
of Horn clause logic programs \cite{lloyd:logic:pgm}.

\sections{Advanced Declarative Language Features}
\label{sec:declar}

In this section, we discuss more advanced features of the 
declarative language.  In particular,
we discuss non-ground facts, and rules with negation and
set-generation.  CORAL supports a significantly larger class
of programs with negation and set-generation than other 
deductive database systems. 
It is also the {\em only} deductive database system that
supports non-ground facts.

\subsections{Non-Ground Facts}
\label{sec:ngterms}

Unlike Aditi (\cite{vrkss:aditi}), EKS-V1 (\cite{vbkl:eksv1:short}), 
LDL (\cite{nt:ldl:bk,ldl:proto}), Glue-NAIL! (\cite{muv:des,pdr91:glue})
and Starburst SQL (\cite{mpr90:aggregates}), 
which restrict the facts in a database to be ground, 
CORAL permits variables within facts.
As an example, consider Figure~\ref{fig:prog:dumb}. 
It is possible to query $ append $ as follows:
\logicprog{
\lquery{append([1,2,3,4,X],[Y,Z], ANS)}
}
and get the answer (a fact with variables in it)
\logicprog{
\lfact {} {ANS = [1,2,3,4,X,Y,Z]}
}
The interpretation of a variable in a fact is that the fact is true for 
every possible replacement of each variable by a ground term. 
Thus a fact with a variable in it represents a possibly infinite database.
Such facts are often useful in knowledge representation, and could be 
particularly useful in a database that stores (and possibly manipulates)
rules.
There is another, possibly more important use of variables---namely to 
specify constraint facts; we do not discuss this issue here.

Since we allow non-ground facts, 
we do not require rules to be range-restricted. 
Non-ground facts in the database are a special case of non-range-restricted
rules where the body is empty.

\subsections{Negation}
\label{sec:neg}

The keyword {\sf not} is used as a prefix to indicate a negated body literal.
For instance, given a predicate $ parent $, we can test if $ a $ is not a
parent of $ b $ by using $ {\sf not~} parent(a,b) $.
Such a literal can be used in a query, or in the body of a rule.

CORAL supports a class of programs with
negation that properly contains the class of {\em non-floundering 
left-to-right modularly stratified programs} ~(\cite{r90:modstrat}).
A program is non-floundering if all variables in a negated literal are
ground before the literal is evaluated (in the left-to-right rule 
order).
\footnote{
We discuss general sip orders that can be used instead of
the left-to-right rule order, in Section~\ref{sec:sips}.}
Intuitively, a modularly stratified program is such that in the answers and
subgoals generated for the program, there should be no cycles through 
negation.
This class of programs (properly) includes the class of programs with 
{\em locally stratified} negation.

The following example from \cite{r90:modstrat} illustrates the use of 
modularly stratified negation in a program.
Suppose we have a complex mechanism constructed out of a number
of components that may themselves be constructed from smaller components.
Let the component-of relationship be expressed in the
relation $ part $.
A component is known to be working either if it has been (successfully) 
tested or if it is constructed from smaller components, and all the
smaller components are known to be working.
This is expressed by the following program.
\logicprog{
\lrule{} {working(X)} {tested(X)} \\
\lrulecont{} {working(X)} {part(X,Y),}\\
\lheader{ \hspace*{0.75cm} $ {\sf not~} has\_suspect\_part(X) $.} \\
\lrule{} {has\_suspect\_part(X)} {part(X,Y), {\sf not~} working(Y)}
}
Note that the predicate $ working $ is defined negatively in terms of itself.
However, the $ part $ relation is acyclic, and hence the $ working $ status
of a component is defined negatively in terms of subcomponents, but not
negatively in terms of itself.
CORAL provides an evaluation mechanism called Ordered Search 
\cite{rss92:ord:search} that evaluates programs with left-to-right modularly 
stratified negation efficiently.


\subsections{Creating Sets and Multisets}
\label{sec:sets}

Sets and multisets are allowed as values in CORAL;
$ \{ 1, 2, 3, f(a,b), a \} $ is an example of a set,
$\{ 1, f(a), f(a) \}$ is an example of a multiset.
Sets and multisets can 
contain arbitrary values as elements, and can themselves be used
as arguments to functors (lists, sets or multisets).
General matching or unification of sets (where one or both of the sets can have
variables, respectively) is not supported.
Although LDL supports set matching,
we believe that most, if not all, uses of set matching as in LDL can be
implemented naturally using the suite of functions that we provide on sets.
Since we allow arbitrarily nested structures, we must define what 
the domain (or universe) of discourse is.
In this we follow LDL, whose treatment of the universe is an extension of
the {\em Herbrand universe} that is used as a standard in logic programming.
The extended Herbrand universe is described in \cite{bnst:sets:LDL}.

There are two ways in which sets and multisets can be created using
rules, namely, set-enumeration ($\{~\}$) and set-grouping ($<>$), as in 
LDL.  Actually, these operators are defined to create multisets in
CORAL, with the cardinality of an element being the number of times
it appears in an enumerated term and the number of derivations in
a term generated with grouping.
The operations CORAL permits on sets are different from those 
supported by LDL, and we discuss these differences after describing
the support for sets and multisets in CORAL.

The first way of constructing multisets is called {\em multiset-enumeration}.
The following rule (fact) illustrates this construct.
\logicprog{
\lfact{}{children(john,\{mary, peter, peter, paul\})}
}
We require that 
all elements of a multiset must be ground terms 
(i.e., not contain any variables).
For instance, if the following rule is used to create a multiset,
$ X $ must be bound to a ground value in the rule body.
\hornclause{}{setchild(john,\{X\})}{child(john,X)}
Such enumerated multiset terms may be used in the bodies of rules also,
but as before, all variables in the term must be  bound to ground
values before the term is encountered (in the sip order) in the body.
 
The second way of constructing a multiset is called {\em multiset-grouping}.
This construct is more powerful than multiset-enumeration.
It is also restricted to generate ground facts.  Indeed,
the restrictions are more stringent than for multiset-enumeration:
every variable that appears in the head of a rule with multiset-grouping
must be bound to a ground term in the generation of a fact.
The following rule illustrates the use of multiset-generation:
\logicprog{
\lrule {}{p(X,<Y>)}{q(X,Y,Z)}
}
This rule uses facts for $ q $ to generate a multiset $ S $ of
instantiations for the variables $ X, Y$, and $ Z $.
For each value $ x $ for $ X $ in this set it creates a fact 
$ p( x, \pi_Y \sigma_{X=x} S) $, where $\pi_Y$ is a multiset projection
(i.e., it does not do duplicate elimination).
Thus with facts $ q(1,2,3), q(1,2,5) $ and $ q(1,3,4) $ we get the fact 
$ p(1,\{2,2,3\}) $.

The use of the set-grouping construct in CORAL is 
similar to (but not exactly the same as) the grouping construct in LDL---
grouping in CORAL is defined to construct a multiset, whereas 
grouping creates a set in LDL.
We can always obtain a set from a multiset using the $ set $ operator.
In fact, with the following rule, the evaluation is optimized to 
create a set directly, rather than to first create a multiset and then 
perform duplicate elimination to convert it to a set.
\logicprog{
\lrule {}{p(X,set(<Y>))}{q(X,Y,Z)}
}
In several programs, the
number of copies of an element is important, and the support for
multiset semantics permits simple solutions.  For example, to
obtain the amount spent on employee salaries, the salary column
can be projected out and grouped to generate the multiset of salaries,
and then summed up.  The projection and grouping in LDL yields
a set of salaries, and if several employees have the same salary,
the total amount spent on salaries is hard to compute.

We require that the use of the multiset-grouping operator be left-to-right
modularly-stratified (in the same way as negation).
This ensures that all derivable $ q $ facts with a given value 
$ x $ for $ X $ can be computed before a fact $ p(x,\_) $ is created.
While there are some intuitively clear programs that do not meet
this restriction, the current implementation cannot deal with them properly.
\footnote{
LDL imposes the more stringent restriction that uses of grouping be stratified.
We note that while EKS-V1 does not support set-generation through grouping,
it does support set-grouping in conjunction with aggregate operations
such as {\sf count, min} and {\sf sum}.  Indeed, EKS-V1 allows recursion
through uses of aggregation.
}

\subsections{Operations on Sets}
\label{sec:set:ops}

We provide several standard operations on sets and multisets as
built-in predicates.
These include $ {\sf member} $, $ {\sf union} $, $ {\sf intersection} $,
$ {\sf difference} $,
$ {\sf multisetunion} $, $ {\sf cardinality} $, $ {\sf subset} $, 
and $ {\sf set} $.  
The multiset versions of these operations are carefully chosen to
preserve the intuitive semantics of multisets.
For reasons of efficiency,
most of these are restricted to testing, and will not permit generation --- 
for example, the $ {\sf subset} $ predicate cannot be used to generate
subsets of a set, but can be used to test if a given set is a subset of 
another.
The predicate $ {\sf member} $ is an exception in that it can be used to 
generate the members of a given set.

The following important restriction in CORAL enables efficient implementation:
A multiset term is restricted to be ground (as in LDL)
and to match only another (identical) ground multiset term or a variable.
\footnote{
LDL permits non-ground set-terms to appear in rule literals, and
the LDL implementation generates a number of rules at
compile time that is exponential in the size of the largest set-term
in the program text.  
All the LDL rules with set terms that we have seen are easily translated into
CORAL rules.  
}

We allow several aggregate operations to be used on sets and multisets. 
The list of aggregate operators we support includes $ {\sf count}, 
{\sf min}, \lbok {\sf max},\lbok {\sf sum}, {\sf product}, {\sf average} $
and $ {\sf any} $.
Some of the aggregate operations can be combined directly with the 
set-generation operations for increased efficiency.
For instance, the evaluation of the following rule is optimized to store
only the maximum value during the evaluation of the rule, instead 
of generating a multiset and then selecting the maximum value.
{\setrulebodywidth{3.15in}
\logicprog{
\lrulecont {}{maxgrade(Class,{\sf max}(<Grade>))}{}\\
\lheader{ \hspace*{.75cm} $ student(S,Class), grade(S,Grade). $}
}}
This optimization is also performed for {\sf count, min, sum} and 
{\sf product}.

{\setrulebodywidth{3.15in}
\widelogicprogfig{\label{fig:shortest} Program Shortest\_Path} {
\lheader{{\sf module} $ shortest\_path $.} \\
\lheader{{\sf export} $ shortest\_path(bfff,ffff) $.} \\
\lrule{} {shortest\_path(X,Y,P,C)} {s\_p\_length(X,Y,C),
	path(X,Y,P,C)}\\
\lrule{} {s\_p\_length(X,Y,min(<C>) )} {path(X,Y,P,C)} \\
\lrulecont{}{path(X,Y, P1,C1)} {path(X,Z,P,C), edge(Z,Y,EC), } \\
\lcontrule{} {} {append([edge(Z,Y)], P, P1), C1 = C + EC. } \\
\lrule{}{path(X,Y,[edge(X,Y)],C)}{edge(X,Y,C)} \\
\lheader{\sf end\_module.} \\
}
}

The program in Figure~\ref{fig:shortest} illustrates how to use aggregation
to find shortest paths in a graph with edge weights.
(The program as written is not efficient, and may loop for ever;
in Section~\ref{sec:aggsel}
we describe how annotations may be used to get an efficient version of the
program.)
This program can be used, for example, to compute cheapest flights.
The use of more complicated combinations of grouping and aggregation in 
CORAL is illustrated below.
{\setrulebodywidth{3.15in}
\logicprog{
\lrulecont {}{numofemps(M,{\sf count}({\sf set}(<E>)))}{}\\
\lheader{ \hspace*{.75cm} $ worksfor(E,M).$} 
}}
This results in one tuple per manager
with the second argument as the number of distinct employees working 
under her.
The following example illustrates the use of
${\sf member}$ to generate the elements of a set.
\logicprog{
\lrulecont{}{ok\_team(S)}{old\_team(S), {\sf count}(S,C), C \leq 3,}\\
\lheader{\hspace*{.75cm} $ {\sf member}(X,S),{\sf member}(Y,S), 
		{\sf member}(Z,S), $}\\
\lheader{\hspace*{.75cm} $ engineer(X), pilot(Y), doctor(Z). $}
}
Each tuple in $old\_team$ consists of a set of people.  An $ok\_team$
tuple additionally must contain an engineer,
a pilot and a doctor.  Note that a team containing a single member
who is an engineer, a pilot and a doctor would qualify as an $ok\_team$.
This program is a translation into CORAL
of an LDL program from \cite{stz:set:compile}; the semantics of the original
LDL program required that a team contain at most three members.
The addition of ${\sf count}(S,C), C \leq 3$ to the body of the rule
ensures this.

\subsections{Persistent Relations}
\label{sec:persistent:rels}

The schema of a persistent relation must be declared, e.g.,
$schema(employee(string, int, float, string))$.  Currently,
tuples in a persistent relation are restricted to have fields of
type string, int or float.
Except for the points noted below (in Section~\ref{sec:mult:dbs}),
a persistent relation behaves
just the same as a non-persistent relation.  Indices can be
declared, and are implemented as B+ tree indices.
Derived and base relations, and also relations in the rewritten
programs, can be made persistent.
\footnote{The current implementation supports only persistent base relations.}

CORAL uses the EXODUS storage manager to support persistent relations.
EXODUS uses a client-server architecture; CORAL is the client process,
and maintains buffers for persistent relations.  If a requested
tuple is not in a local buffer, a request is forwarded to the
EXODUS server and the page with the requested tuple is retrieved.
In the current implementation, the tuple is copied from the local buffer
into the CORAL space.  This is adequate when queries do not examine
very large subsets of persistent relations, but is likely to cause
problems otherwise.  We are investigating alternative techniques
wherein local copies of requested tuples are not created.

\subsections{Multiple Databases}
\label{sec:mult:dbs}

A {\em database} is a collection of relations, which can be either
explicitly enumerated ``base'' relations or relations exported by
a module. It is useful to think of a database as a workspace or
environment.  A user can have several named databases, copy
relations between two databases (or simply make a relation in
one database visible from another without copying), update
relations in a database, or run queries against a database.
It is also possible to save a database in a file between
executions.

Persistent relations exist in a database called ``db\_rels'',
and can be made visible to other databases without copying.
When a database that refers to a persistent relation is saved,
only the name of the persistent relation---and not
its current set of tuples---is saved.

\section{Modules in CORAL}
\label{sec:modules}

CORAL provides a powerful module mechanism.  As mentioned earlier,
there are three kinds of modules, declarative, imperative and 
command modules.  All modules however, present a uniform interface:
one or more query forms (predicates with binding patterns) are
exported by each module, and an exported predicate can be viewed
externally essentially as an EDB relation.  There is always a
{\em current database}, and when a module is consulted, i.e. opened,
the exported predicates are added to the list or relations in the
current database.  The fact that these predicates are defined by
a program rather than an explicit collection of tuples is transparent.

Three goals govern the design of modules in CORAL:
\begin{enumerate}
\item
The meaning of a module should depend only upon the rules in the
module and upon modules that export predicates used in it.
\item
Each module should be compiled separately, and should be re-compiled only
if there is some change to it.
\item
Modules should present a uniform interface similar to the interface
to a relation.
\end{enumerate}

We introduce modules in CORAL through a simple example.

\subsections{An Example}
\label{sec:decmodules}

The program in Figure~\ref{fig:prog:dumb} 
illustrates declarative modules in CORAL.
The program computes paths, with lists used to maintain the
sequence of nodes on a path.\footnote{
The use of $append$ is for illustrative purposes.  We can use $cons$ 
instead to get the edges in the reverse order.}

{\setrulebodywidth{3.15in}
\widelogicprogfig{\label{fig:prog:dumb} Program Path} {
\lheader{{\sf module} $ path $.} \\
\lheader{{\sf export} $ path(bfff,ffff) $.} \\
\lrulecont{}{path(X,Y, P1,C1)} {path(X,Z,P,C), edge(Z,Y,EC), } \\
\lcontrule{} {} {append(P, [edge(Z,Y)], P1), C1 = C + EC. } \\
\lrule{}{path(X,Y,[edge(X,Y)],C)}{edge(X,Y,C)} \\
\lheader{\sf end\_module.} \\
\\
\lheader{{\sf module} $ Listroutines $.} \\
\lheader{{\sf export} $ append $ ($ bbf, bfb, fbb $).} \\
\lfact{}{append ([~], L, L)} \\
\lrule{}{append ([H \mid T], L, [H \mid L1])}{append (T, L, L1)} \\
\lheader{{\sf end\_module}.}
}
}

The module definition permits a subset of the defined predicates to be named
as exported predicates, and other modules can pose queries
over these predicates.  
The query forms permitted for each exported predicate are also indicated 
in the {\sf export} declaration.
For instance, the $ path $ module exports 
the predicate $ path $ (queries on this predicate that have the 
last three arguments free and the first argument either bound or free are
permitted), and
the $ Listroutines $ module in the above program
exports the $ append $ predicate (in three
permissible query forms---each corresponding to one of the three arguments
being free and the other two bound).
The syntax for exported predicates in a module $ M $ is:
\begin{quote}
{\sf export} {\tt <pred> [ (<adorn-list>) ], [\ldots] }
\end{quote}
Note that queries that do not match the given form can be posed, but the
CORAL system might use an inefficient technique to evaluate such queries,
or might run into problems with predicates (such as arithmetic predicates)
that require certain arguments to be ground.

For (pure) declarative modules, CORAL evaluation (with occur checks)
is guaranteed to be sound,
i.e., if the system returns a fact as an answer to a query, that fact indeed
follows from the semantics of the declarative program.
The evaluation is also ``complete'' in a limited sense --- as long as the
execution terminates,
all answers to a query are actually generated.
It is possible however, to write queries that do not terminate; in some
such cases (e.g., programs without negation or set-grouping)
CORAL is still complete in that it enumerates all answers in the limit.
(Of course, the use of choice, updates,
aggregate selections and the absence of 
occur checks can result in incomplete or even unsound evaluation.  
These features should therefore be used with some care.)

If module $ M $ uses a predicate defined in module $ N $, say $p$,
$ M $ is said to {\em depend-on} $ N $.
(If two modules $ N1 $ and $ N2 $ both export a predicate with the 
same name there is a naming conflict.)
During the evaluation of $ M $, $ p $ may be invoked several times.
CORAL processes this as follows.
While applying a rule in $ M $ containing an occurrence $ p ( ) $,
the partially instantiated literal $ p ( ) $ is 
used to generate a goal on module $N$.
The answers to this query are used iteratively in rule $r$;
each time a new answer to the query is required, rule $r$ requests
a new tuple from the interface to module $N$.

In order to be able to obtain the complete set of answers to this goal without
invoking module $ M $ recursively, we require that the depends-on graph for
{\em inter-module calls} be acyclic.\footnote{The restriction that calls
across modules be acyclic is related to completeness issues in
our (bottom-up) evaluation strategy. While all the rules within a 
modules are iterated untila a fixpoint is reached in response to a goal,
there is no ``outer'' iteration over the set of (the rules in) all modules.}
A simple sufficient condition
to ensure that the depends-on graph for inter-module calls is acyclic
is that the depends-on graph for modules be acyclic.

The interface to relations exported by a module makes no assumptions 
about the evaluation of the module.
In the discussion above,
module $ N $ may contain only base predicates, or may have rules that are
evaluated in any of several different ways.
The module may choose to cache answers between calls, or choose to recompute
answers.
All this is transparent to the calling module.
Similarly, the evaluation of the called module $ N $ makes no 
assumptions about the evaluation of calling module $M$.
\footnote{
The basic internal interface to an exported predicate is the 
$ get\_next\_tuple(TupleIterator) $ member function of Relations.
The query pattern is specified in the TupleIterator structure, and 
successive calls to $ get\_next\_tuple $ return successive tuples that
match the specified pattern (the state of computation of module $ B $ is
stored in a field of the TupleIterator structure in between calls to
$ get\_next\_tuple $).
}

This orthogonality permits the free mixing of different evaluation
techniques in different modules in CORAL and is central to how 
different executions in different modules are combined cleanly.
This issue is discussed in the next section.

\sections{Controlling the Evaluation of Declarative Modules}
\label{sec:eval}

Several optimization techniques have been proposed for evaluating
declarative modules.
Although details are beyond the scope of this paper --- 
\cite{rss91:bupeval}) provides an overview of these techniques ---
we briefly note some main points 
we list those that are supported in CORAL and indicate
how the user can choose from these techniques to tailor execution.
{\em By default, CORAL chooses a combination of optimizations,
and the user can choose to specify no annotations whatsoever.}
However, other combinations might work better for some queries,
and the expert user can choose an appropriate combination using
annotations on a per-module basis.  This ability to
control the evaluation through high-level hints is one of the
central features of CORAL, and we present the main ideas in this section.

We have organized the presentation as follows.
First, we discuss the evaluation options that are controlled at the
module level, that is, options that govern the evaluation of an
entire module.  These include the choice of rewriting methods,
materialization versus pipelining, and some annotations that influence
the order in which inferences are made.  
Next, we discuss options that are specified on a per-predicate basis.
These include specification of indices, aggregate selections, and
{\em choice} declarations. 
Finally, we present options that are specified on a per-rule basis.
This include the specification of sips and join orders and
a simple update command that is really a directive to discard a fact.
We caution that some of the annotations can result in an evaluation that does not
conform to the logical reading of the clauses, and should therefore
be used with care.  

In the following sections,
after presenting each of the options available to the user, we
conclude by indicating the default, and by indicating interactions, if
any, with other options.

\subsections{Module Level Control}
\label{sec:modeval}

\subsubsection{Rewriting Methods}

Several optimizing program transformations have been described in the
literature.  These include transformations such as
Magic Templates and Supplementary Magic Templates
(\cite{br:ont,r:mag}), which are designed to restrict a bottom-up
fixpoint computation in a way that is similar to a top-down
query-driven execution.  The Supplementary variation essentially
avoids some recomputation at the cost of additional memoing of
intermediate relation in the evaluation of rule bodies.
Both techniques are applicable to all programs; they
are influenced by the choice of {\em sips} (see Section \ref{sec:sips}).
Context factoring
(\cite{nrsu:ari,nrsu:sigmod,krs90:context}) is a transformation
that is less generally applicable than Magic and Supplementary
Templates, but is likely to yield significant speedups in
many cases when it is applicable.
CORAL supports all three transformations; the user
can choose any one or specify that none of them is to be used.
The above transformations are only applicable in conjunction with
{\em materialized evaluation} (see Section \ref{sec:mat:pipe}).

CORAL also supports
Existential Query Optimization (\cite{rbk:opt}), 
which projects out unnecessary columns from a relation and thereby
reduces the arity of relations.
This can be done in conjunction with materialized evaluation,
in addition to one of the three other transformations, 
and also in conjunction with {\em pipelined evaluation}
(Section \ref{sec:mat:pipe}).

 
Magic Templates rewriting could potentially generate several different 
adorned forms of each predicate, corresponding to the sets of 
arguments that are bound on different ``calls'' to the predicate.
(See \cite{br:ont} for more details on adornments.)
The user can specify that only certain adornments are allowed for a predicate.
If no adornment is specified for a predicate it is assumed that all 
adornments are allowed for the predicate.
The syntax for specifying allowed adornments is shown in Table~\ref{tab:annos}.
Each of $ < adorn1> $, $ <adorn2>, ... $ is a string composed of
the characters `f' and `b' corresponding to the arguments of the 
predicate.\footnote{As before, `f' denotes that bindings should not 
be passed for the corresponding arguments and `b' denotes that bindings 
should be passed.}
If an adornment is generated for a predicate during the rewriting, it 
is accepted if it is in the allowed adornment list for that predicate.
Otherwise, if a less bound allowed adornment is found, it is used instead.
If no such allowed adornment is found, the adornment algorithm 
signals an error.

{\em Default:  Supplementary Magic Templates with left-to-right
sips is the only transformation that is applied in conjunction with
materialized evaluation; no transformation is applied in conjunction
with pipelined evaluation.}

{\em Interactions:  Magic, Supplementary Magic and Context Factoring
are mutually exclusive.  Further, they can only be used in conjunction
with materialized execution.  Existential Query transformation can be
done in addition to any of the other three, and can be used
in conjunction with both materialization and pipelining.\\
(Also see: Ordered Search, sips)}
 
%%%%%%%%%%%%%
%Facts generated using a rule share 
%subterms with facts used to derive it; persistent versioning is
%used to ensure that the correct semantics is preserved even when
%non-ground structures are shared \cite{sr:nonground92}.
%This speeds up derivations considerably when using large terms such as
%lists.
%We have implemented hash-consing (\cite{goto74:hashcons,sg76:hashcons}),
%which allows fast unification of large ground terms (eg.\ ground lists
%or multisets).
%%%%%%%%%%%%%%%%

\subsubsection{Materialization Vs. Pipelining}
\label{sec:mat:pipe}

Consider the following rules: 
\logicprog{
\lrule{} {r(X,Y)} {p(X,Z), q(Z,Y)}\\
\lrule{} {p(X,Y)} {p1(X,Z), p2(Z,Y)}
}
Materialized evaluation creates a relation for $p$ and stores the
generated tuples, whereas pipelined evaluation simply
generates the $p$ tuples and joins them with $q$ tuples.
The two approaches complement each other.  If $p$ is used
many times, the cost of materialization is outweighed by
the savings in avoiding recomputation.  On the other hand,
pipelining can be done very efficiently, and unless subqueries
on $p$ are indeed set up multiple times, the cost of storing the
$p$ tuples is avoided.

CORAL supports both materialization and pipelining.  An
interesting aspect of pipelining in CORAL is the treatment of
recursive predicates.  A subquery on the recursive predicate
is solved by a recursive invocation of the same module,
and each invocation pipelines the local results.  The
resulting computation is close to the evaluation strategy
of a top-down implementation such as Prolog.  (Of course,
pipelined evaluation of recursive modules carries the
risks of potential incompleteness associated with a depth-first
strategy, and should be used
with care.)

CORAL also supports a variant of materialized evaluation
described below.

\heading { Ordered Search}
\label{sec:ord:search}

Ordered Search is an evaluation mechanism that orders the use of 
generated subgoals in a program.
Subgoals and answers to subgoals are generated asynchronously, as
in the regular bottom-up evaluation of the magic program.
The order in which generated subgoals are made available for use is somewhat
similar to a top-down evaluation.
This is achieved by maintaining a ``context'' that stores subgoals
in an ordered fashion, and at each stage in the evaluation decides which
subgoal to make available next for use.

We do not present full details of Ordered Search here, but refer the 
reader to \cite{rss92:ord:search}.
However, we note some of the advantages of Ordered Search here:
(1) it maintains information about dependencies between subgoals, which
can be used to evaluate a large class of programs with negation, and 
(2) it provides an ordering to the computation that 
hides subgoals;  when a single answer to the query is all that is needed,
there may be many subgoals that are still hidden when an answer to the
query is found, and the computation can terminate without ever using 
the subgoals; thus we may be able to prevent a lot of redundant 
computation from taking place.

The following example illustrates how Ordered Search works.
\logicprog{
\lrule{}{anc(X,Y)} {edge(X,Y)}\\
\lrule{}{anc(X,Y)} {edge(X,Z),anc(Z,Y)}\\
\lrule{}{query(a,b)}{anc(a,b)}\\
\lfact{}{@{\sf choice}~~ query(X,Y)()(X,Y)}\\
\lfact{}{edge(1,2)}\\
\lfact{}{edge(2,3)}\\
\lfact{}{edge(2,4)}\\
\lfact{}{edge(4,5)}\\
\lfact{}{edge(5,6)}\\
\lquery{query(a,b)}
}

Consider the following program and query, and suppose that only
one answer is desired for the query.
\logicprog{
\lrule {r1:} {path(X,Y,[X,Y])} {edge(X,Y)}\\
\lrule {r2:} {path(X,Y,[X|P])} {edge(X,Z),path(Z,Y,P)} \\
\lfact {f1:} {edge(1,2)}\\
\lfact {f2:} {edge(1,3)}\\
\lfact {f3:} {edge(2,4)}\\
\lfact {f4:} {edge(3,4)}\\
\lquery {path(1,4,X)}
}

Bottom-up evaluation of the magic program with an extra termination condition
(stop on generating an answer) generates $ 7 $ facts when 
evaluating this program.
Ordered search generates only $ 5 $ facts.
We can easily extend this example to make ordered evaluation perform
arbitrarily better than bottom-up evaluation.
The converse is also possible, and therefore care should be exercised in
using this feature.
 
Ordered Search provides an important evaluation strategy for 
modularly stratified programs (with respect to negation and
multiset-generation).  Without Ordered Search, evaluation proceeds
by setting up inter-module calls whenever a subgoal that
has to be completely solved is encountered.  This results in
computation of such subgoals proceeding independently, with
no sharing of subcomputations.  
While this might be desired
in some situations, it could result in considerable repeated
computation in general.  Ordered Search allows us to evaluate
modularly stratified programs without inter-module calls;
thus, the subcomputations are shared, thereby eliminating
repeated derivations.
The comparison to the implementation of recursive aggregates in
EKS-V1 is 
very similar to the comparison presented in \cite{rss92:ord:search}
between Ordered Search and Ross' technique for evaluating modularly 
stratified programs with negation.
In summary, Ordered Search is asymptotically more efficient for programs
computing only ground facts.

{\em Default:  Materialization is used by default, unless the module
contains set-grouping or negation.  In this case, the default
strategy is Ordered Search.}

\reminder{is pipelining now the default in non-rec modules?}

{\em Interactions:  Several rewriting methods,
the Prioritize annotation, aggregate selections, join orders,
sips and Save Module can not be specified in conjunction with
pipelining.  The use of Ordered Search requires Magic or
Supplementary Magic rewriting.

Set-grouping is not supported in conjunction with pipelining.  Negation is
supported, but the semantics is negation-by-failure, as in Prolog.}

\subsubsection{Duplicate Checks}
\label{sec:moddup}

By default, duplicate elimination is performed when inserting facts 
into a relation,
so that a relation with only ground tuples consists of a set of 
facts.\footnote{If facts contain variables, duplicate elimination requires 
subsumption checking.
A newly generated fact $n$ is  compared with each existing fact, and
is added to the relation only if $n$ is not subsumed by any existing fact.
On the other hand, it is possible that $n$ subsumes some existing fact $e$;
this is not checked.  Even if such a situation is detected, discarding
an existing fact is complicated in a structure-sharing implementation.
}
An annotation {\sf allow\_duplicates} tells the system to not perform 
duplicate checks for any predicate in the module.
A {\em multiset} annotation ensures that CORAL checks 
{\em only} the magic predicates for duplicates.
\footnote{
For a restricted class of programs, this ensures that all relations
are multisets in which the cardinality of each tuple is equal
to the number of derivation trees for it in the original program.
In particular, the program must be range-restricted, and if a variable
appears twice in a body literal, or inside a structured argument, it
must also appear before that literal in the sip order.}

{\em Default:  Duplicate checks are performed on all predicates (in the
rewritten program, if a transformation is applied) with materialized evaluation.}

{\em Interactions:  Any form of
duplicate checking is incompatible with pipelined evaluation.}

\subsubsections{Prioritizing Facts in Semi-naive Evaluation}
\label{sec:order}

The use of facts computed during bottom-up evaluation can be prioritized.
Consider the shortest path program from Figure~\ref{fig:shortest}, that uses
the predicate $ path (Source, Destination, Path\_list, Cost) $.
For this program, it is better to explore paths of lesser cost first.
This can be achieved by using $ path $ facts of lesser cost in preference to
$ path $ facts of greater cost.
%%%%%%%%% DELETEd
% Sloppy delta iteration \cite{gkb87:sloppy} can be used for this purpose ---
%%%%%%%%%%%%%%%%%%%%%%
$ path $ facts of greater cost are hidden when they are derived, and each
time a fixpoint is reached, the $ path $ facts of lowest cost are exposed.
This continues until there are no more hidden facts.

The user can specify that the evaluation prioritize the use of facts in this
fashion, using an annotation of the following form:
\begin{tabbing}
1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= \kill
\> @ {\sf prioritize}   $path (X, Y, P, C) ~ min (C)$. 
\end{tabbing}
This annotation is easily extended to prioritize facts for multiple predicates
at the same time.
We describe the benefits of this annotation in Section~\ref{sec:aggsel}.

{\em Default:  No prioritization is specified.}

{\em Interactions:  Cannot be used in conjunction with pipelining.}

\subsubsections{The Save Module Facility}

The module mechanism provides several important advantages.
First, by moving many rules out of a module, the number of rules that 
are involved when performing an iteration on a module is reduced;
this is particularly useful when computation in the higher module can
proceed only after answers to subgoals on the lower module have been returned.
Second, predicates defined in an external module are treated just like 
base predicates by the semi-naive rewriting algorithms --- whenever there 
is a query (or set of queries) on such a predicate, a call to the module 
is made, and all the answers are evaluated.
This has the benefit that the number of semi-naive rewritten rules 
decreases considerably if more predicates can be treated as base predicates.
Third, in most cases, facts (other than answers to the 
query) computed during the 
evaluation of a module are best discarded to save space (since bottom-up
evaluation stores many facts, space is generally at a premium).
Module calls provide a convenient unit for discarding intermediate
answers.
By default, CORAL does precisely this - it discards all intermediate
facts and subgoals computed by a module at the end of a call to the 
module.

However, there are some cases where the first two benefits of modules 
are desired, but the third feature is not a benefit at all, but instead
leads to a significant amount of recomputation.
This is especially so in cases where the same subgoal in a module 
is generated in many different invocations of the module.
In such cases, the user can tell the CORAL system to maintain the state of the
module (i.e., retain generated facts) in between calls to the module, 
and thereby avoid recomputation; we call this facility the 
{\em save\_module} facility.

In the interest of efficient implementation we have the following 
restriction on the use of the save\_module feature:
{\em if a module uses the save\_module feature, it should not be 
invoked recursively.}
We do not make any guarantees about correct evaluation should this 
happen at run-time.
(Note that the predicates defined in the module can be recursive;
this does not cause recursive invocations of the module).

%%%%%%%%%%%%%
% Efficient implementation of this feature in general, when modules can have 
% parameters and can be called recursively, seems to be a very hard problem.
% Hence we have two restrictions on the use of this feature.
% (1) we require that the module not have any parameters, and
% (2) we require that the module not be called recursively.
%%%%%%%%%%%%%

{\em Default:  All facts computed by a module are discarded after
computing the answers to a query.}

{\em Interactions:  Cannot be used in conjunction with pipelining.}

\subsections{Predicate Level Control}
\label{sec:predeval}

CORAL provides a variety of per-predicate annotations.

\subsubsection{Duplicate Checks}

The {\em allow\_duplicates} and {\em multiset} annotations can also
be specified on a per-predicate basis.  Whereas the former can be
specified on any predicate in the rewritten program, the latter can
only be specified on predicates in the original program, and in
conjunction with Magic or Supplementary Magic rewriting.

{\em Default and Interactions:  See Section \ref{sec:moddup}.}

\subsubsection{Indexing Relations}
\label{sec:indexing}

An index on all arguments of the relation is automatically created by the 
system whenever a CORAL relation  
is created, and automatically updated whenever the corresponding
relation is updated (either by inserting or deleting facts).
While this index is very useful (especially for duplicate elimination), 
additional indices may be specified,
through annotations, to improve evaluation efficiency.
(Once specified, indices are automatically maintained as the relation
is updated.)

CORAL supports two forms of indices:
(1)~{\em argument form indices}, and
(2)~{\em pattern form indices}.
Both forms of indices are implemented through dynamic hashing for
main memory relations; only the first form is supported for persistent
relations, and is implemented using B+ trees.
The first form creates an index on a subset of the arguments of a relation.
The second form is more sophisticated, and allows us to retrieve 
precisely those facts that match a specified pattern that can contain 
variables.
Such indices are of great use when dealing with complex objects created
using functors.
Suppose a relation $ employee $ had two arguments, the first a name and
the second a complex term $ address(Street,City) $.
The following declaration then
creates a pattern form index that can efficiently retrieve, for instance, 
employees named "John", who stay in "Madison", without knowing
their street.
\begin{tabbing}
12\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= \kill
\> @ {\sf make\_index} $ employee (Name, address(Street, $\\
\> \> \> \> \> \> $ City)) (Name, City) $.
\end{tabbing}
Similarly, a pattern form index can be used to retrieve, for example, 
tuples in relation $ append $
that have as the first argument a list that matches $ [X|[1,2,3]] $. 
A tuple $ ([5|[1,2,3]], [4], [5,1,2,3,4]) $ would then be retrieved.

We present some examples of index specification below:
The annotation
\begin{tabbing}
1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= \kill
\> @ {\sf make\_index} $p$  ($bfb$).
\end{tabbing}
creates an index on the first and third arguments of relation $ p $.
The annotation
\begin{tabbing}
1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= \kill
\> @ {\sf make\_index} $m\_append$  ($[H \mid T], L) (T,L)$.
\end{tabbing}
creates an index that lets us retrieve facts for $ m\_append $
such that the facts
unify with $ m\_append$  ($[H \mid T], L) $, and $ T $ and $ L $ are
bound (on unification with the tuple) to specified values.

The Supplementary
Magic Templates rewriting stage generates annotations to create all
indices that are needed for efficient evaluation.
The user is allowed to specify additional indices,
which is particularly useful if the default optimizations are overriden.

{\em Default:  Materialized evaluation with Supplementary Magic is the default.
An analysis is carried out to determine what indices are needed and these
are automatically created.  With other rewriting methods, or with pipelining,
only the index on all arguments is created for each predicate.}

{\em Interactions:  Pattern form indices only supported on main-memory relations.
Indices can be specified on EDB, IDB and persistent relations.}

\subsubsection{The Choice Operator}

CORAL provides a version of the choice operator of LDL, but with altogether
different semantics \cite{rbss90:coral}.
The following example illustrates the use of choice in CORAL.
Consider the $ path $ predicate from Figure~\ref{fig:shortest},
and suppose that we are interested in just one path between each pair of 
nodes.  (For instance, the user may want
just one answer, or perhaps the predicate $ path $ is used in a computation
that works equally well irrespective of which path it gets, so long as it
gets at least one path between each pair of nodes $ X, Y$ 
whenever such a path exists.)
This can be specified using the following annotation:
% The following program illustrates the use of the choice annotation.
\logicprog{
\lfact{}{@{\sf choice}~~ path (X,Y,P,C) (X,Y) }\\
% \lrule{}{path(X,Y,[X,Y])}{edge(X,Y)} \\
% \lrule{}{path(X,Y,[X|P])}{edge(X,Z), path(Z,Y,P)}
}

The choice annotation says that for each value of the pair $ x,y $, at most
one fact $ path(x,y,p,c) $ need be retained for $ path $.
If more than one fact $ path(x,y,p,c) $ is generated by the program for any 
pair $ x,y $, the system arbitrarily picks one of the facts to retain,
and discards the rest.
If we wish to retain a path for each pair of nodes and each path cost,
we could use the annotation
\logicprog{
\lfact{}{@{\sf choice}~~ path (X,Y,P,C) (X,Y,C) }\\
}

Unlike in LDL, the choice made is final --- CORAL does not backtrack
and try different ways to make the choice.
This semantics can be implemented more efficiently in 
a bottom-up fixpoint evaluation than the LDL semantics.
Giannotti et al.~\cite{gpsz91:localchoice} have investigated the 
connections between this ``local'' version of choice and stable models.

Note that the use of choice in CORAL is sound, in the following sense,
for programs that do not use negation or set-grouping --- any answer 
that is produced by a program that uses choice would have been
produced by a version of the program with the choice removed.
The semantics is non-deterministic since the system makes no guarantees as
to which fact it will retain.
It is possible that because of the use of choice, a query that had an
answer may no longer have any answers.
However, we believe there are many applications where an arbitrary choice
needs to be made for efficiency, and will not affect the answer.
In addition to the example above, we find many examples of such choices
made in graph algorithms such as depth-first search etc.
Greco et al.~\cite{gzg92:greedy} illustrate the utility of local choice
in a variety of ``greedy'' algorithms.

{\em Default:  No choices are made unless explicitly specified by the user.}

{\em Interactions:  If used in conjunction with Existential Query 
Optimization or Factoring, choice must be specified on the rewritten
program since arities of the predicates could change. Choice is
not supported with pipelining in the current implementation.}

\reminder{We should discuss the following.  How is it different from grouping??

It seems like a deterministic variant of the choice operator may be 
useful.  Eg. {\sf choice} pred(X,Y,C) (X,Y) (min(C))  retains not 
any arbitrary fact but one with minimum C value.
}

\subsubsection{Aggregate Selections}
\label{sec:aggsel}

Consider the $ shortest\_path $ program from Figure~\ref{fig:shortest}.
To compute shortest paths between points, it suffices to use only the shortest
path between pairs of points since path facts that are do not correspond to
shortest paths are irrelevant.
CORAL permits the user to specify an {\em aggregate selection} of the following 
form on the predicate $ path $.
\begin{tabbing}
1\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= \kill
\> @ {\sf aggregate\_selection}  $path(X,Y,P,C) (X,Y) min(C)$.
\end{tabbing}
The system then checks (at run-time) if a path fact is such that there is
a path fact of lesser cost $ C $ with the same value for $ X, Y $
(i.e., between the same pair of points), and if there is such a fact, the costlier
path fact is discarded.
This aggregate selection is extremely important for efficiency --- without
it the program may run for ever, generating cyclic paths of increasing
length.
With this aggregate selection, along with the choice annotation
$@{\sf choice}~~ path (X,Y,P,C) (X,Y,C) (P)$,
a single source query on the program runs in time $ O(E \cdot V) $, where
there are $ E $ edge facts, and $ V $ nodes in the graph.

Using facts in a prioritized fashion (described in Section~\ref{sec:order})
reduces the cost of evaluation of a single source shortest path problem 
from a worst case of $ O(E \cdot V) $  to $ O(E \cdot log(V)) $\footnote{
Assuming that the edge costs are non-negative.} (\cite{sr91:aggr}).
This illustrates the importance of aggregate selections and 
prioritizing the use of facts in a bottom-up evaluation.
\cite{sr91:aggr} describes a technique to generate
such aggregate selections automatically, but aggregate selections could
also be specified by the user.

{\em Default:  No aggregate selections are generated, in the current implementation.}

{\em Interactions:  Cannot be used in conjunction with pipelining.}

\subsections{Rule Level Control}
\label{sec:ruleval}
 
\subsubsection {Choosing Sips}
\label{sec:sips}

Informally, a sideways information passing st\-r\-a\-tegy (sips) 
tells the 
evaluation system how to process a rule given the form of a query on the 
head predicate of the rule.
The form of the query is specified by an {\em adornment string} 
which consists
of a sequence of $ b $'s and $ f $'s of length equal to the arity of the 
predicate.
An adornment $ bf $ would match a query that had the first argument
{\em bound} to some term (not necessarily a constant) and the second argument
being a {\em free} variable.
For each such query form, the user can specify an order in which the literals
in the body of the rule are evaluated --- an answer to a literal in the 
rule provides bindings for later literals in the rule, and these bindings
can be used to restrict queries on the later literal.
This may be critical for some evaluable predicates, since they may require 
some arguments to be bound in order to return a finite set of answers.
It may also be important for other predicates for reasons of correctness or
efficiency.  The sip is treated as a parameter to the Magic and Supplementary
Magic transformations, along with the program and the query.
CORAL uses a default left-to-right sips.

Hence, if no sips are specified, a query on a rule is conceptually
solved as follows. 
First, the left-most literal is queried with the bindings provided by the 
query on the head --- the answers to the
query on the left-most literal result in some bindings for other variables
in the rule. 
The next literal from the left is then queried, using the bindings provided
by the head and the first literal when creating the query.
This process continues towards the right, until all the literals are
solved and answers for the head are created.
Note that this description is conceptual --- the actual 
evaluation may do things in a different sequence; however, given a
query on the head, the subqueries 
generated on the body literals are as described above.

{\em Default:  Left-to-right sips.  This cannot be overridden for pipelining.}

{\em Interactions:  Used only in conjunction with Magic or Supplementary Magic
rewriting.}

\subsubsection{Join Orders}
\label{sec:joinorder}

The choice of join order is related to but distinct from the choice of 
sip order.
CORAL uses a default left-to-right join order in the absence of information
about relation sizes, except that for semi-naive rewritten rules 
the ``delta'' predicate is moved to the head of the join order.
The user can change this default on a per-rule or on a 
per-semi-naive-rewritten rule basis, in a fashion similar to the sip-order.

{\em Default:  Left-to-right join order, with delta predicates moved to the
front of (semi-naive) rules.}

{\em Interactions:  Is inapplicable in conjunction with pipelining.}

\subsubsection{The Update Operator}
\label{sec:updates}


We allow a limited form of update in rule heads, which is illustrated
by the following program.
\logicprog{
\lfact{R1:}{path(X,Y,\infty)}\\
\lrulecont{R2:}{path(X,Y,\infty \rightarrow C)}{edge(X,Y,C),}\\
\lheader{\hspace*{.75cm} $path(X,Y,\infty).$}\\
% \lrule{R2:}{path(X,Y, \infty \rightarrow C)}{edge(X,Y,C)}\\
\lrulecont{R3:}{path(X,Y,C \rightarrow C1+C2)}
	{edge(X,Z,C1),}\\
\lheader{\hspace*{.75cm} $ path(Z,Y,C2), path(X,Y,C), C1+C2 < C.$}\\
\lquery{path(a,b,C)}
}
In the above program we have used $ \infty $ to represent some value that is
larger than the maximum length of acyclic paths in the $edge$ relation.
Note the difference in the structure of the head of rule $ R3 $.
The notation $ C \rightarrow C1+C2$ 
says that on successfully instantiating the rule, any fact that matches
$ path(X,Y,C) $ should be replaced by a fact 
$path(X,Y,C1+C2)$.
Under the semantics described above, an evaluation of this program 
stores only one fact $ path(x,y,c) $ for each pair $ x,y $,
and computes shortest paths in the $ edge $ graph.

An alternate way of understanding this rule would be as follows.
{\setrulebodywidth{3.75in}
\logicprog{
\lrulecont {}{path(X,Y,C1+C2), {\sf delete}~path(X,Y,C)} {}\\
\lheader{\hspace*{.75cm} $edge(X,Z,C1), path(Z,Y,C2),  $}\\
\lheader{\hspace*{.75cm} $path(X,Y,C), C1+C2 < C. $}
}}
Each successful rule instantiation deletes any facts
that match the (instantiated version of) $path(X,Y,C)$,
and inserts (the instantiated version of) $path(X,Y,C1+C2)$.
In effect, here, the third field of the $ path $ fact is updated ``in place.''

This program sums the elements of a list.
\logicprog{
\lfact{R1:}{list\_sum(L,L,0)}\\
\lrule{R2:}{list\_sum(L,[X|L1] \rightarrow L1, N1 \rightarrow N1 + X)}
	{list\_sum(L, [X|L1], N1)}\\
\lrule{R3:} {sum(L,N)} {list\_sum(L,[],N)}\\
\lquery{sum([1,2,3,4],N)}
}
Note the rather strange structure of the head of rule $ R2 $.
The notation $ [X|L1] \rightarrow L1$ and $ N1 \rightarrow N1 + X $ 
says that on successfully instantiating the rule, and fact that matches
$list\_sum(L,[X|L1],N1) $ should be replaced by a fact 
$list\_sum(L,L1,N1+X)$.
An alternate way of understanding this rule would be as follows.
\hornclause{}{list\_sum(L,L1,N1+X), {\sf delete}~~list\_sum(L,[X|L1],N1)}
 		{list\_sum(L, [X|L1], N1)}
Each successful rule instantiation deletes any facts
that match the (instantiated version of) $list\_sum(L,[X|L1],N1)$,
and inserts (the instantiated version of) $ list\_sum(L,L1,N1+X) $.
 
Under the semantics described above, an evaluation of this program 
stores only one $ list\_sum $ fact for each list $ L $,
and computes the sum of the numbers in the given list in the query.
As each new fact is created, the second and third fields of the
old fact are updated ``in place.''

The use of update is sound, in the following sense, for 
programs that do not use negation or set-grouping --- 
any fact computed by a program using the update operation will also be 
computed by the version of the program with the update removed.
Our support for updates in declarative modules is much more limited than
that in LDL.
More general use of updates requires the use of the imperative module
or command modules.

Although the semantics of using this operation is in general
non-deterministic,
there are several classes of programs for which the semantics 
is deterministic.
For example, if (1) the deleted fact was used only in the body of the 
rule, (2) there is only one instantiation of the body of the rule that
can use this fact, and (3) the deleted fact is derived in only one way
(i.e., will not be derived again), it is always safe to delete the fact.
For a detailed discussion of when facts can be discarded during a 
bottom-up evaluation, see \cite{ssrn91:space}.
The update operation can thus also be viewed as an annotation of the program
to say when facts can be discarded.

The list example above falles into this class.  However,
The $ path $ example discussed earlier clearly does not fit into the 
above class, since it avoids computing some answers (facts that do not 
correspond to shortest path lengths might not be computed).
However, the program does have a clean semantics that is visible to the 
user, namely that it computes and stores shortest path facts.

{\em Default:  No updates are specified by default.}

{\em Interactions:  Cannot be specified in conjunction with pipelining.}

\subsection{Summary of CORAL Annotations}
\label{sec:annotations}

As discussed earlier,
the user can control the way the CORAL system evaluates a declarative
module by using any of a variety of annotations within a declarative module.
Several of these annotations are shown in Table~\ref{tab:annos}.
 
{\sf
\begin{figure}
\begin{tabular}{|l|}
\hline\\
@ allowed\_adornment $ \langle pred\_name \rangle ( \langle adorn1 \rangle, 
		\langle adorn2 \rangle, \ldots )$. \\
@ supplementary\_magic. \\
@ magic. \\
@ no\_magic. \\
@ factoring. \\
@ no\_factoring. \\
@ existential. \\
@ no\_existential. \\
@ materialize.\\
@ pipeline. \\
@ do\_ordered\_search.\\
@ prioritize $ \langle pred\_name \rangle ( \langle variables \rangle)
 		\langle order \rangle ( \langle variable \rangle) $ . \\
@ save\_module.\\
@ make\_index $ \langle pred\_name \rangle ( \langle \mbox{b-f-} 
 	string \rangle ) $. \\
@ make\_index $ \langle pred\_name \rangle 
 		( \langle argument\_pattern \rangle ) 
		( \langle bound\_vars \rangle )$. \\
@ aggregate\_selection $ \langle pred\_name \rangle ( \langle variables \rangle)
 		, (\langle grouping\_variables \rangle), 
 		\langle aggregate\_function \rangle ( \langle variable \rangle)
 		$ . \\
@ choice $ \langle pred\_name \rangle ( \langle variables \rangle)
 		(\langle non\_choice\_variables \rangle)
 		(\langle choice\_variables \rangle) $. \\
@ allow\_duplicates.\\
@ allow\_duplicates  $ \langle pred\_name \rangle $. \\
@ multiset  $ \langle pred\_name \rangle $ . \\
\hline
\end{tabular}
\caption{\label{tab:annos}Annotations Permitted in CORAL}
\end{figure}
}  %\sf
 
Most of the annotations should be self-explanatory in light of
the preceding sections.  In addition, we note the following points.
Indices can not only be created by annotations in declarative modules,
but can also be created from the imperative module via a function call,
and can also be created at run-time using the interactive front end that is 
provided with CORAL.

CORAL allows the use of the arithmetic operators $ +, -, *, /, // $ as
function symbols within the head or body literals of a rule.
These are transformed into literals in the body of the rule, and are 
reordered to a ``safe'' location, depending on the query.
This reordering is done at compile time based on expected query forms.
The user can suppress or enable this feature using annotations
{\sf @ do\_not\_reorder\_arithmetic} or {\sf @ reorder\_arithmetic}.


\sections{Imperative Modules}
\label{sec:imperative}

An imperative module is a program in an imperative language, which consists of 
C++ augmented by adding a layer of new types and constructs.

We introduce some features of imperative modules using the program in 
Figure~\ref{fig:imperative}, which updates the 
salary of a person depending on
the number of employees that work for the person (directly or 
indirectly).
The program gives an intuitive idea of the features we provide for
imperative modules.
 
\begin{figure}
\ordinalg{
\> void Update\_Sals (Relation *emp) \\
\> $\{$ \+ \\
\\
\> TupleType(EmpTuple,3) \\
\> \> TupleArg(1, string, ename); ~~ TupleArg(2, string, mname); \\
\> \> TupleArg(3, int, sal); \\
\> EndTupleType(EmpTuple) \\
\> TupleType(NumempsTuple,2) \\
\> \> TupleArg(1, string, mname); ~~ TupleArg(2, int, numemps); \\
\> EndTupleType(NumempsTuple) \\
\\
\> EmpTuple *emp\_tuple, new\_tuple;\\
\> NumempsTuple query, *result\_tuple, pattern;\\
\> Relation *numofemps = new IndexedRelation(2); \\
\\
\> query.set\_mname(make\_var(0)); \\
\> query.set\_numemps(make\_var(1)); \\
\> call\_coral ("numofemps", \&query, numofemps); \\
\\
\> FOR\_EACH\_TUPLE (emp\_tuple, emp)  $\{$ \\
\> \> pattern.set\_mname( emp\_tuple$\rightarrow$ename()); \\
\> \> pattern.set\_numemps( make\_var(0) );\\
\> \> int newsal = emp\_tuple$\rightarrow$sal(); \\
\> \> FOR\_EACH\_MATCHING\_TUPLE(result\_tuple, numofemps, \&pattern) \{ \\
\> \> \> newsal += 10* result\_tuple$\rightarrow$numemps(); \\
\> \> $ \} $ END\_EACH\_TUPLE(result\_tuple)\\
\> \> copy\_tuple(emp\_tuple, new\_tuple); \\
\> \> new\_tuple$\rightarrow$set\_sal(newsal); \\
\> \> update\_tuple(emp, emp\_tuple, new\_tuple); \\
\> $ \} $ END\_EACH\_TUPLE(emp\_tuple)\\
\- \\
\> $\}$
}

{
\setrulebodywidth{2.75in}
\logicprog{
\lheader{{\sf module} Employee.} \\
\lheader{{\sf export} $numofemps$ (bf).} \\
\\
\lrule{}{worksfor(E,M)}{emp(E,M,S)} \\
\lrule{}{worksfor(E,M)}{worksfor(E,E1),emp(E1,M,S)} \\
\lrule{}{numofemps(M,{\sf count}(set(<E>)))}{worksfor(E,M)} \\
\\
\lheader{\sf end module.}
}
}
\caption{Updating Employee Salaries}
\label{fig:imperative}
\end{figure}

The basic object that the C++ user needs to understand to be able to
interface with CORAL is the {\em relation}, which can be treated
as a set of tuples.
The user can create two types of relations: {\em unindexed relations\/}
and {\em indexed relations\/}.

Indices can be added to a relation by means of a procedure call.
A C++ user can also directly access a database relation (not just get a
copy of it) by providing the name of the relation and its arity.
CORAL provides facilities to insert tuples into, and delete tuples from
relations using the ``$ += $'' and ``$ -= $'' operators.
A procedure {\sf update\_tuple} to update tuples in a relation is
also provided, as illustrated in the example.

CORAL provides two iterative constructs for accessing the tuples of a
relation (the tuples are returned in an arbitrary order).
{\sf FOR\_EACH\_TUPLE} successively instantiates its first argument to
each tuple in the relation given by the second argument.
{\sf FOR\_EACH\_MATCHING\_TUPLE} successively instantiates its first argument
to each tuple in the relation given by the second argument
that matches the pattern specified by the third argument.
A variety of other functions are available to the imperative language programmer
to manipulate relations.
These include all the set and aggregate functions described earlier.

A C++ user can invoke a query on a relation that is defined declaratively
(and exported by a declarative module), using a procedure
{\sf call\_coral}.
%%% illustrated in the example.
There are two variants of this procedure, one of which takes a single
query, and the other a set of queries.
In later versions of CORAL we plan to allow
inline declaration of a declarative module within imperative code.
This will provide a simpler syntax for calling declarative modules
and could be interpreted as a direct extension of the C++ language.
CORAL provides a simple convention for defining predicates using
C++ code.
We also provide a facility to define subclasses of ${\sf tuple} $ with
typed and named attributes. 
Methods corresponding to the attribute names are created for the subclass,
and these help in seamlessly converting types between C++ primitive types and
CORAL's internal type system.

These features are described in more detail below.

\subsections{The Relation Abstraction}
 
The basic object the C++ user needs to understand to be able to
interface with CORAL is the {\em relation}, which can be treated
as a set of tuples.
We provide a library of routines to create, access and manipulate tuples 
and arguments of tuples.  
The user can create two types of relations: {\em unindexed relations\/}
and {\em indexed relations\/}.
Both are subclasses of the generic class {\em relation}, which 
has many functions defined on it.
 
Currently, indexed relations have, by default, a hash-index on the set of
{\em all} arguments of the relation.
Additional hash indices may be created (at any time during the life of 
the relation) by means of a procedure call.
We also permit the creation of persistent relations.

A C++ user can also access a database relation (not just get a 
copy of it) using by providing the name of the relation, and its arity.
We provide functions to write a relation on to a text file.
% In later versions of CORAL we will provide C++ primitives for facts 
% stored in a text file to be read directly into a relation.
We provide primitives to insert tuples into a relation, delete tuples 
from a relation, and to update fields of a tuple in a relation.
In all cases the indices are automatically kept up to date.
Note that only database relations, and relations created by the C++ user 
can be updated in this fashion.  
The C++ user has no direct access to derived relations defined by
a declarative CORAL module, except in posing queries over them.
 
We provide two iterative constructs for accessing the tuples of a relation
(the tuples are returned in an arbitrary order).
These are currented implemented as C++ macros:
\ordinalg{
\> \> FOR\_EACH\_TUPLE (Tuple *tuple, Relation *rel) $\{ $ \\
\> \> \ldots \\
\> \> $\}$ END\_EACH\_TUPLE(tuple) \\
\\
\> \> FOR\_EACH\_MATCHING\_TUPLE (Tuple *tuple, Relation *rel, 
        Tuple *matching\_tuple) $\{ $ \\
\> \> \ldots \\
\> \> $\}$ END\_EACH\_TUPLE(tuple) 
}
 
A variety of functions are available to the imperative language programmer
to manipulate relations.
These include all the set/aggregate functions described in 
Section~\ref{sec:set:ops}.
 
\subsections{Interface to the Declarative Language}
 
A C++ user can invoke a query on a relation that is defined declaratively
(and exported by a declarative module), using the following interface.
 
\ordinalg{
\> \> int call\_coral (char *exported\_pred\_name, Tuple *query\_arglist, 
		Relation *result); \\
\> \> int call\_coral (char *exported\_pred\_name, 
                 Relation *set\_of\_query\_arglists,
                 Relation *result);
}
 
\noindent These procedures add the answers to the result relation.
When called with an empty relation, it can be used to initialize it.
In the first form, a single query is provided, whereas in the second
form a set of queries is provided, as a relation.
In later versions of CORAL we plan to allow the inline declaration of 
a declarative module within imperative code.
This will also provide a simpler syntax for calling declarative modules
and could be interpreted as a direct extension of the C++ language.

\subsections{Defining Predicates Using C++ Procedures}
\label{sec:def:preds}
 
Often a user of declarative CORAL may want to define a predicate using 
C++ for reasons of efficiency, or otherwise.
For any such predicate, the user needs to specify a 
function which, given a (partially instantiated) tuple, 
returns a relation containing the complete set of tuples that matches 
the given query tuple.
If each query has at most one answer, a function that returns a single tuple
may be defined instead.
In addition, the user must tell CORAL what predicate this function is
used for.
This is done by declaring an instance of a built-in class
called BuiltinRelation, with the appropriate parameters, as illustrated
in the example below:
\ordinalg{
\> BuiltinRelation dummy\_name("append", 3, append\_solver)
}
where {\sf append\_solver} is a function (as described above) that takes
a tuple (with two lists to be appended as the first two arguments, and a
variable as the third argument) and returns the tuple with the third
argument set to the result of appending the two given lists.
 
\subsubsections{Arguments and Tuples}
\label{sec:lib}

Arguments of a tuple must be subclasses of a system defined class {\sf Arg}.
There are several such subclasses, which we do not go into here.
To write C++ code that defines a predicate, or that modifies a CORAL
relation, we provide a library that allows the C++ user to manipulate 
arguments and tuples.  
This library
includes routines to examine as well as to construct arguments
and tuples.
We provide a variety of routines to convert values between the internal format 
(subclasses of {\sf Arg}) and standard C++ types such as character strings,
integers, and double precision floating point numbers.
We provide several functions to determine the type of an argument of 
a tuple.
 
We provide routines to create tuples of a specified arity, and to
define tuple types with named and typed attributes.
This is illustrated in Figure~\ref{fig:imperative}.
The declaration of a {\sf TupleType} creates a subclass of the superclass
{\sf Tuple}, with several methods defined on it.
% \footnote
% {The current syntax is used essentially to be able to implement named
% attributes using C++ macro definitions.
% Later versions of CORAL will support a cleaner syntax for supporting
% named attributes.
% }
The first line of the definition of a tuple type is of the form\\
\hspace*{1cm} {\sf TupleType($\langle$name$\rangle, \langle$arity$\rangle$)}\\
This is followed by several lines of the following form:\\
\hspace*{1cm} {\sf TupleArg($\langle$arg\_number$\rangle, 
	\langle$type$\rangle, \langle$name$\rangle$)}
\\
Each of these declarations specifies
the type and name of an argument position of the tuple.
 
As can be seen from this example, the names specified for the attributes
are used as method names of this class, and these methods can be used
to examine arguments of tuples.
Further, methods are created to assign values to the named arguments
of a tuple---these are have names of the form 
{\sf set\_$\langle$attrname$\rangle$}, where
the corresponding argument is named {\sf attrname}.
 
We also provide functions to strip apart a tuple, giving the C++ 
programmer access to the components of tuples.
We can determine the arity of a tuple, and can access an argument of a tuple
by providing the position of the argument.
We also provide functions to write tuples and arguments of tuples onto 
text files.

\sections{Command Modules}
\label{sec:command}

Command modules provide support for imperative programming without
resorting to C++.
Command modules provide sequencing and iteration
as control constructs, and a set of atomic commands that includes
any command that can be
typed in at the CORAL prompt.
Command modules can be parameterized.
For example, if we wish to write a module that reads in a file
and does some processing, the name of the file can be a parameter.
Whereas --- at least, in the absence of dynamic linking --- C++ imperative
modules must be compiled with the CORAL runtime system, command modules
can be consulted from the CORAL prompt, just like declarative modules.
We illustrate some features of command modules using the following example
to sort a unary relation.

\begin{example}
If the relation $elem$ is implemented as a heap
(this can be specified by the CORAL user), the following program\footnote{
The program is intended to illustrate the functionality of command modules,
but some of the syntax is still tentative.}
would implement heap sort.
This can be used elsewhere through the notation
$HeapSort(in\_rel).sort(Sorted\_list)$.
Note that $in\_rel$ could be a set or the name of a relation.

\begin{tabbing}
12 \= 12 \= 12 \= 12 \= 12 \= 12 \= 12 \= 12 \= 12 \kill
{\sf module} HeapSort ($in\_rel$). \\
{\sf export} $sort$ (f). \\
$elem(X) ~\ifff~ in\_rel(X)$. \\
$sort ([~])$. \\
while ($elem(C)$) \{  \\
\> $sort (B \rightarrow [A|B])$ \> \> \> \> \> \>
	$\ifff~ sort(B), A = max(elem.1)$. \\
\> ${\sf delete}~elem(A)$ \> \> \> \> \> \>
	$\ifff~ sort([A|B])$. \\
\} \\
{\sf end\_module}.
\end{tabbing}
$max(elem.1)$ is used to obtain the maximum element in the
first column of the (current) $elem$ relation.
Consequently, the above program incrementally builds up sorted lists of
the largest values in the $elem$ relation.
\end{example}



\sections{Using the CORAL System}
\label{sec:coral:system:use}

The CORAL implementation, in contrast to LDL, does not do
full compilation.  Rather, there is a fixed run-time system that
essentially interprets rules, and a user program is compiled into
an optimized set of rules.  This results in fast compilation,
making CORAL suitable for interactive program development.\footnote
{However, if there is an imperative module in a program, the entire
CORAL system must be re-compiled when the imperative
module is compiled. 
This is because we do not use a dynamic linker.}

When the user initiates a CORAL session, she gets the CORAL prompt $>$.
At the CORAL prompt, the user can execute any of a number of commands.
We describe a few of these commands by showing a sample CORAL 
session below:
\begin{tabbing}
1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= 1234\= \kill
\> $>$ {\sf consult("append.P").} \\
\> $>$ {\sf ?append([1,2],[2,4],X).} \\ \\
\> \> X=[1,2,2,4]. \\
\> \> (Number of Answers = 1) 
\end{tabbing}
where the file ``append.P'' contains {\sf module append} (described
in Figure~\ref{fig:prog:dumb}).
Once the file ``append.P'' is consulted, the CORAL system:
(1) generates a method for the query form {\em append(bbf)}, and
(2) creates a file ``append-M.P'' which contains the corresponding
magic program.
The user can now query {\em append\/} with the first two arguments bound,
and the third argument free;
a query from the user needs to be preceded by a ``$?$'', as shown.
Some programs, for example transitive closure programs, need EDB
facts in addition, and a user can read data from an external file using 
the {\sf consult} command as well.

CORAL provides utilities to take a text file organized as a table,
parse it into fields and records, and convert it into a relation.
Similarly, utilities for output of relations in tabular form are 
also provided.
The user can also execute any Unix command using the {\sf shell} command
% using the command {\sf shell (unix-command)} 
from the CORAL prompt.
CORAL provides a {\sf help} facility that details the various 
commands available from the CORAL prompt.

\heading{Program Development Environment}
\label{sec:debugging}

CORAL provides some basic facilities for debugging of programs.
A {\sf trace} facility is provided that does the following:
(1) it lets the user know what rules are being evaluated, and
(2) it prints out answers and subgoals as they are generated,
to let the user know how the computation is proceeding.
It is possible to trace individual predicates rather than tracing all
predicates.

CORAL also provides some high-level profiling facilities.
The unit of profiling is the  unification operation.
Unification of two atomic terms counts as one unification,
while, for example, unification of $f(X,Y)$ and $ f(a,b) $ counts as three
unifications, one at the outer level and two at the inner level.
Profiling also lets the user know how efficient the indexing is,
by keeping counts of the number of tuples that the indexing
operation tried to unify, and the
number that actually unified and were retrieved.
In addition, other counts such as number of successful applications of
each rule, and the number of unsuccessful attempts at using a rule are
also maintained.
All this information put together gives users a fair idea of where
their programs are spending the most time, and helps them optimize programs
accordingly.

%%%%%%%%%%%%%
% The declarative language is extensively optimized, and debugging would
% ideally be treated in such a way that the programmer is not required
% to either know very much about the optimizations, or to think operationally;
% at the same time, she must have enough details of the execution visible
% to identify the source of an error.
% Providing a translation of operations back past the optimization steps
% presents an interesting challenge, and is an item for future development.
%%%%%%%%%%%%%

%%%%%%%%%%%
%\heading{Current Status}
%
%The first prototype version is a main-memory implementation with
%about 25,000 lines of C++ code.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsections{A Preliminary Performance Comparison of CORAL with LDL}
\label{sec:perf}

In this section we present results of a brief comparison of the performance
of CORAL with LDL.
(EKS-V1 and Glue-NAIL! are not currently distributed publicly,
and we were not able to compare their performance.)
We emphasize that these results are preliminary, since CORAL
is still in the process of being tuned, and our comparison is 
itself very limited.  The goal is to give the reader some idea of
how CORAL compares with other deductive database systems.

A main observation is that
by virtue of being partially interpreted instead of fully 
compiled, CORAL is much faster than LDL in reading and compiling
queries.  In this respect, CORAL is comparable to Prolog systems.
It is therefore very convenient for interactive program development.

%%%%%%%%%%%%% SUDARSHAN - Deleted
% We measured two aspects of the performance of CORAL and LDL.
% The first is the time taken to read in and compile/interpret a query.
% The second is the time taken to evaluate a query.
% In the first aspect, CORAL outperforms LDL considerably, by virtue of the
% fact that CORAL is partly interpreted, while LDL is compiled and spends a great
% deal of time converting queries into C programs and compiling them.
% LDL programs often took several minutes to read in
% and compile (over $3 \frac{1}{2}$ minutes of elapsed time for a 
% program with six rules and 
% six query forms on a lightly loaded system).
% All the CORAL programs that we ran took just a fraction of a second to 
% read in and compile into the internal representation.
% Thus CORAL is better suited for interactive development than LDL.
% 
% While LDL does allow the saving of a state after compilation, restoring of a 
% saved state by LDL typically takes a few seconds, which is
% more than what CORAL takes to read in and ``compile'' a program.
% The size of the saved state in LDL is of the order of several tens of 
% kilobytes even for small programs with two or three rules.
% CORAL is partly interpreted, and a program that is read in is compiled
% into a compact internal form and stored.
% This internal representation is then interpreted.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%
%Table~\ref{tbl:coral:ldl} compares the performance of the 
%LDL system with the performance of the CORAL system.
%We emphasize that this comparison is preliminary, for several
%reasons: 
%(1)~The performance evaluation itself is not exhaustive. 
%(2)~The current implementation of CORAL is designed for ease of modification,
%and for dealing with non-ground terms in their full generality.
%Further low-level optimizations (which we are investigating)
%should improve performance significantly.  
%(On the non-recursive programs CORAL is much slower than LDL.) 
%Nonetheless, we believe that the numbers offer evidence that
%CORAL outperforms LDL on many programs.
%
%The programs were run on a SUN 4 workstation with 8 MB of memory.
%All times are user cpu times (in seconds) obtained using the timing 
%system of the tcsh 
%shell, with the time to start up CORAL or restore the saved state in LDL
%subtracted from the total time.
%% The space utilization numbers represent average space utilization over
%% the course of the program execution.
%%%%%

We also compared execution times --- Unix user cpu times on a lightly loaded
Sun 4 workstation ---
on a work load that included
a simple join rule, linear recursive programs (ancestor and
same generation), non-linear programs (bi-linear ancestor),
and structure manipulation (list append).\footnote{ 
The programs and queries were chosen to preclude intelligent
backtracking, factoring and other optimizations that apply to some,
but not all, programs in order to get numbers that reflect the general
case.}
The data-sets included trees, chains, a (sparse) random graph and
lists of varying lengths.  Both LDL and CORAL allow the user some
execution choices (e.g.\ whether or not to eliminate duplicates), and
we used the best combinations for both systems.\footnote{
For CORAL, pipelined execution is over twice as fast on append,
but we give the numbers for bottom-up evaluation to provide a
meaningful comparison.}

To summarize the results of the comparison, we found that the following
observations generally held.  LDL was about three times faster than
CORAL on simple joins (10s vs.\ 30s on a join that generated 
--- but did not materialize -- an
intermediate relation of 100,000 tuples).
This is explained by the fact that LDL's
compilation strategy allows for some optimizations on a per-rule basis
that is not possible with CORAL's partial interpretation, and also by
the fact that CORAL uses more abstract representations for terms
since it has to support non-ground terms, unlike LDL.
CORAL was typically much faster than LDL on the linear recursive queries.
A selection of the numbers that we obtained illustrates this:
on same generation with a 0.1\% selection, 
5.2s vs.\ 25.3s on a chain of length 1000
and 5.2s vs.\ 28.9s on a tree; and
on right-linear ancestor with no selection, 37.4s vs.\ 265.8s
on a chain of length 160.
(The only exception that we found to this trend was on
left-linear ancestor with no selections, where CORAL
took 33.5s vs.\ 17.1s for LDL on a chain of length 160.)
While there is no fundamental reason why this should be so, we conjecture
that CORAL perhaps has better indexing. CORAL was also faster on
bilinear queries, especially with selections.  
For instance, on bi-linear ancestor with a 1\% selection, CORAL
took 1.2s vs.\ 39.8s for LDL on a tree.
The reason here is
that LDL only implements a version of magic sets that deals with linear
recursive queries.  This is confirmed by the fact that LDL cannot
run the takeuchi program (a standard Prolog benchmark),
where bindings must be propagated through
recursive literals to avoid unsafe calls on arithmetic predicates.
Finally, CORAL is linear on append, whereas LDL is quadratic.  
For example, CORAL execution time went from 0.6s to 2.4s when
we quadrupled the list length, while LDL went from 4s to 56s.
Again,
we conjecture that the difference is due to indexing, 
and for this example in particular, the treatment of structured terms.

We also ran these queries on a Prolog system,
CLP($\cal R$) Version 1.1 from IBM.  
As expected, CORAL was much faster on all but the
append query, on which CLP($\cal R$) was much faster.
On the append program, the overhead of memoing facts
was wasted; on the other programs, with the exception of the join, 
it saved much repeated computation.
(Incidentally, Prolog will not terminate on the left-linear and
bi-linear versions of ancestor.)
We note that there are Prolog systems such as BIM,
Quintus and Sicstus Prolog that are much faster than CLP($\cal R$);
we used CLP($\cal R$) since the others are not available to us currently.

Finally, we note that Prolog-style execution can be obtained
in CORAL by using the pipelined mode of evaluation.
Our implementation of pipelining is not as sophisticated
as current Prolog implementations; execution is typically
slower than CLP($\cal R$), but by a factor of less than 10.
However, on many programs, e.g.\ append, it is faster than
standard bottom-up evaluation with magic sets rewriting.

%%%%%%
%The data sets used are straightforward --- the Chain160 (resp.\ Chain1000)
%dataset represents a chain of edges of length 160 (resp.\ 1000).
%DownTree1110 represents a downward pointing tree with branching factor 10,
%and depth 4 (for a total of 1110 facts).
%The programs used for comparison, other than Nonrec1 and Nonrec2,
%are well known and we do not repeat them here.
%Nonrec1 and Nonrec2 are single line programs that compute the join 
%of the parent relation with itself on the first column.
%The only difference between them is that Nonrec1 does not
%perform duplicate elimination while Nonrec2 does.
%We note that the time taken to print out results is included in the 
%timing numbers, and for some of the ancestor programs and for the Nonrec
%programs this could be a significant portion of the total time.
%
% \begin{table}
% \begin{center}
% \begin{tabular}{|l|l|l|r|r|} \hline
% Program	&	Data Set & Query  & Coral Time (secs) & LDL Time (secs) \\
% \hline
% Same generation & DownTree1110 & ?sg(1,X) & 10.26   & 28.89  \\
% Same generation & Chain160	 & ?sg(1,X) & 1.93    &  0.74  \\
% Same generation & Chain1000	 & ?sg(1,X) &13.07    & 25.30  \\
% % Same generation & Chain1000	 & ?sg(X,Y) & 5.68    & 13.46 \\
% \hline
% \comment{For all sg examples, LDL - duplicates=yes, cycles=yes.
% For CORAL duplicate checking turned on, index_deltas off.}
% %%%%%%%%%%%%%%%%%%%%
% Left linear ancestor &  Chain160 & ?anc(X,Y)& 38.56 & 17.14  \\
% Left linear ancestor & DownTree1110 & ?anc(X,Y) & 10.25 & 10.46 \\
% Right linear ancestor & Chain160 & ?anc(X,Y)   & 37.39 & 265.80 \\
% Right linear ancestor & DownTree1110 & ?anc(X,Y) & 11.29 & 19.26 \\
% % Bilinear ancestor & DownTree1110 & ?anc(1,X) & 29.79  & 73.40  \\
% Bilinear ancestor & DownTree1110 & ?anc(11,X) & 1.06  & 39.75  \\
% Bilinear ancestor & DownTree1110 & ?anc(X,Y) & 19.13  & 39.98  \\
% \comment{For all ancestor examples, for LDL duplicates=no, cycles=yes.  
% For CORAL, no subsumption checking on ancestor0 or ancestor2}
% %%%%%%%%%%%%%%%%%%%%
% \hline
% Append &	& Lists of length 100	& 0.73 	& 3.94 	\\
% Append &	& Lists of length 200   & 1.54 	& 14.67 	\\
% Append &	& Lists of length 400   & 3.34 	& 56.35 	\\
% \hline
% Nonrec1 & DownTree1110 & & 29.88  &  5.14   \\
% Nonrec2 & DownTree1110 & & 41.80 &  9.16  \\ \hline
% \end{tabular}
% \end{center}
% \caption{Performance Comparison of CORAL and LDL}
% \label{tbl:coral:ldl}
% \end{table}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



%%%%%%%
%The selection queries on the same generation program each had one answer,
%with a small amount of duplicates being generated.
%The selection query on the bilinear ancestor program had $10\%$ 
%selectivity compared to computing the complete transitive closure.
%The timing numbers for the same generation program and the various
%versions of the ancestor program indicate that CORAL is often faster than
%LDL for recursive programs, although there are some recursive 
%programs for which LDL beats CORAL.
%%%%%%%%%%

\sections{Extensibility in CORAL}
\label{sec:extensibility}

The implementation of the declarative language of CORAL is designed to 
be extensible, i.e., the user can add new types to the system, and can add
new implementations of relations and indices, 
without modifying or recompiling the rest of the system code.
The user's program will, of course, have to be compiled and linked with 
the system code.

\subsections{Adding Abstract Types to CORAL}
\label{sec:abstypes}

%%%%%%%%%
% if we unify an object of the user-defined type with another 
% object of the same type, either the objects must be equal or they must not
% unify --- the result must not be a new object (i.e., an ``instance'' of the 
% two objects that are unified).
%%%%%%%%%

To create a new type\footnote{
Objects of the user-defined type must be ``constants'', i.e., 
they cannot contain variables within them.
New types that are not constants can be supported with minimal change 
to the system code.
Later versions of CORAL may provide more direct support for such types,
without the need to modify system code.}, the 
user must declare it as a subclass of the system
class {\sf ConstArg}.
Several virtual functions (methods) must be defined for the new type.
These include: an operator `$ == $' (which takes an object of type {\sf Arg} 
as parameter), {\sf printon} (which takes a file as a parameter),
{\sf hash} which returns a hash value, {\sf copy} which creates a copy of the
object, and {\sf delete} which is called when the system no longer needs the
object.

The following example illustrates how a new type {\sf bitmap}
may be added to CORAL.

\algorithm{
\> class bitmap: public ConstArg \{ \\
\> \>    $ \ldots $ data structures needed to represent bitmaps.\\
\> \}\\
\\
\> int bitmap::operator ==(Arg *arg) \{ \\
\> \> $ \ldots $ code to implement this function. \\
\> \} \\
\> $ \ldots $ some other functions (printon, hash, copy, delete) \\
\> need to be defined.
}
Once we define a new type, say {\sf bitmap}, we can create tuples
with arguments that are bitmaps --- employee records can now store 
photos of employees represented as a bitmap.
The function {\sf printon} controls how bitmaps are interpreted when they
are printed.
It can, for instance, create a window to display the bitmap.

Since the user implements the `$ == $' operator, it is possible to create
a new representation for an existing type (whether system defined or 
user-defined), and write the `$ == $' operator in such as way as 
to work correctly across representations.
For instance, in the next version of the CORAL implementation, 
strings will have two possible internal representations, depending on whether
the relations are memory resident or disk resident.
Memory resident strings will be entered in a symbol table, while
disk resident strings will not;  however equality checks between strings 
represented in the two formats will not be affected, since `$ == $' will
be redefined for disk-resident strings.

The user would be well advised to make the `$ == $' operator efficient,
especially for types that allow large objects.
For instance, a user who defines a type similar to terms built out of 
function symbols would be well advised to implement some form of hash-consing
 \cite{goto74:hashcons,sg76:hashcons}
on objects of the type, and maintain the hash-cons value along with the 
object for quick equality checks.
This optimization is entirely transparent to the CORAL system.

CORAL does not provide syntactic support for objects of new types within
the declarative module.
(However, it is possible for the user to define built-in predicates that 
construct or retrieve subparts of objects of the new type.
Thus, if a user-defined type is built out of standard types, objects of the
type can be created entirely within the declarative language, and if
required, can be converted back to standard types.)
For example, a constructor for the data type {\em sequence} may take as 
parameter a list of elements, and convert it into whatever internal format is 
used for sequences.
Clearly, bitmaps cannot be efficiently constructed thus, and must be created
by imperative code written by the user.
They can then be stored in relations, and manipulated just like other
CORAL types.

Once objects of a user-defined type are created, presumably the user
will want to manipulate them using rules.
Builtin predicates on the user-defined type will probably be critical
for this stage.
CORAL provides the user with a very simple way of creating such built-in
predicates ---
this was described in Section~\ref{sec:def:preds} ---
and hence this stage should not be a bottleneck in developing
applications that use user-defined types.

The user has control over both the {\sf copy} routine and the 
{\sf delete} routine.
The system never modifies a constant object, so 
the {\sf copy} function can merely return a pointer to the 
old copy of the object, so long as the {\sf delete} function is written 
keeping this in mind (perhaps using a reference count scheme).
In other words, if {\sf copy} returns a pointer to
an object, {\sf delete} may be called several times on the same object,
and some scheme such as reference counting (or merely not doing any actual
deletion) must be used.
Such sharing is important for a type such as a bitmap that could
use a lot of space.
The hash value provided by the user for user-defined types are used 
for creating hash-indices on relations.

\subsections{Adding New Relation and Index Implementations}

CORAL currently supports relations organized as linked lists,
relations organized as hash tables,
relations defined by rules, and relations defined by C++ functions.
The interface code to relations makes no assumptions about the structure of
relations, and is designed to make the task
of adding new relation implementations easy.

Tuples in a relation can be accessed using the
$ get\_next\_tuple (TupleIterator) $ member function of the type $ Relation $.
This function takes as a parameter a $ TupleIterator $ structure that 
contains a pattern, and each call to this function returns a tuple (from 
the relation) that matches the pattern.
The code that searches the relation can save its state in a field of the
$ TupleIterator $ in between calls to $ get\_next\_tuple $.
Tuples can be inserted into relations using $ insert (Tuple) $,
and tuples can be deleted from relations using
$ delete (Tuple) $, each of which are member functions of the type $Relation$,
New implementations of relations can be created by making the
implementation a subclass of $ Relation $.
The functions $ insert, delete $ and $ get\_next\_tuple $ are 
virtual functions, and can be redefined for the user-defined 
implementation of the relation.

Similarly, the user can create index structures
as subclasses of type $ Index $, along with 
$ insert (Tuple), \lbok get\_next\_tuple (TupleIterator)$ and
$delete(Tuple) $ function definitions.
The user can store the indices in an $ IndexSet $
field of the relation, and can use the indices to make the 
$ get\_next\_tuple $ function on relations efficient.
It is relatively straightforward to add, for instance, a B-tree index
in this fashion.

%%%%%%%%%%
% Currently CORAL does not perform join reordering optimizations, and does
% not provide support for the user to specify the cost of indexing for use
% in such optimizations.
%%%%%%%%%%

%%%%%%%%%%%%%%%%%
% This is the only kind of index supported currently by CORAL.
% We are considering implementing other forms of indices such as 
% B-Trees in later versions of CORAL, and may allow the user to 
% provide a comparison operator `$ <= $'.
% If not defined, this operator will default to an operator that says 
% everything is equal, and thus will not affect the correctness of indexing.
% Non-hash indexing may be slowed down by this default if it is used,
% but presumably such indexing will only be used with data types that 
% provide the operator.
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \section{User-Level Control Over Execution Strategy}
% 
% The user can choose from the range of evaluation options supported
% by CORAL through the use of annotations in each module.
% In addition, two constructs are provided for modifying the semantics
% of rules in declarative modules; {\em choice} and a limited form
% of {\em head updates}.  Some of the annotations, as well as the two
% special constructs, can result in an evaluation that does not
% conform to the logical reading of the clauses, and should therefore
% be used with care.  These facilities are described below.
% First, however, we present the interface for modules since this is
% central to how different executions in different modules are combined
% cleanly.
%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Higher Order Facilities in CORAL}
\label{sec:ho}
 
CORAL provides two ways to define generic predicates.
The first involves dynamic binding of predicate names, and we describe 
this in detail.
The second form is purely syntactic in that there is a well-defined
compile time conversion to a rewritten program.
However it offers significant programming benefits.
 
The first approach relies on two features of CORAL.
First, CORAL allows predicates to take other predicates as parameters.
Note that this is different from having sets/relations as parameters
in that the actual extent of the relation is not stored --- rather a
reference to the actual relation (for EDB predicates),
or rules defining the predicate (for derived predicates),
is used as the parameter.
Second, CORAL provides a {\sf call} built-in meta-predicate 
(which takes an arbitrary number of arguments), and can be used to 
invoke the predicate passed as the first argument.

The following program illustrates the use of the {\sf call}
meta-predicate.
 
\logicprog{
\lrule{}{anc(Edge, X,Y)} {call(Edge,X,Y)}\\
\lrule{}{anc(Edge, X,Y)} {call(Edge,X,Z), anc(Edge,Z,Y)}\\
\lquery{anc(tree\_edge,X,Y)}
}

This program defines a generic transitive closure, and the query shown
uses a predicate $ tree\_edge $ whose transitive closure is to be 
computed.
The predicate $ tree\_edge $ must be either an {\em exported} predicate
or an EDB relation.
 
The semantics of {\sf call} is that whenever the {\sf call} literal is 
encountered in the body of a rule, a query is set up on its first argument,
which is fully evaluated before the evaluation of the rule continues.

The use of the {\sf call} predicate thus has two important restrictions:
\begin{enumerate}
\item The value of its first parameter {\em must} be bound at the time
of call to an {\em exported} predicate (either an EDB predicate, or
one explicitly exported by some module).
\item The chain of queries through the call predicate must be acyclic,
else the computation may not terminate.
\end{enumerate}
For efficient evaluation it is recommended that call never be used to 
set up queries on predicates defined in the same module or in 
other modules that depend on the module which initiates the call.\footnote
{ Evaluation is still guaranteed to be correct in such a case, but there 
may be a significant amount of recomputation involved.
}
 
% %%%%%
% % (the amount of
% % recomputation is unlikely to be much worse than the amount of 
% % recomputation performed by Prolog and may be much less).
% %%%%%
 
CORAL also provides a syntactic higher-order feature that is useful 
in several contexts.
It differs from the runtime call mechanism described earlier ---
it can be viewed as a form of a module definition with parameters
that is macro expanded at compile time to generate a set of 
rules that use normal syntax.
This can be used, for example, to create a generic set of rules for 
the ancestor program in a more direct way without the {\sf call} feature.
When invoked with a given $ edge $ predicate, a new set of rules is
generated that performs the ancestor computation for the given $ edge $
predicate.
While module invocation overheads are avoided,
this may result in a large number of rules 
being generated, and must therefore be used with care.

\subsubsection {Macro Definitions}
\label{sec:macro}

{
\setrulebodywidth{4.0in}
\logicprogfig{\label{fig:matmult}Matrix Multiplication}{
\lheader{{\sf macro} $ matrixmult (a, b, c) $.} \\
\lheader{{\sf local} $ temp, tempsum $.} \\
\lcomment{these predicates are local to the macro; unique names need to
be generated on expansion} \\
\\
\lrule{}{temp(I,J,K,Val)}{dim (a,Ra,Ca), dim (b, Ca, Cb), K {\sf~in~} 
\{1..Ca\}, \lbok a(I,K,Va), \lbok b(K,J,Vb),\lbok  Val = Va * Vb}  \\
\lfact{}{tempsum (I,J,0,0)} \\
\lrule{}{tempsum (I,J,K,Val)}{K>0, tempsum (I,J,K-1,Val1), temp (I,J,K,Val2),
\lbok Val=Val1+Val2} \\
\lrule{}{c (I,J,Vc)}{dim (a,\_,Ca), tempsum (I,J,Ca,Vc)} \\
\lrule{}{dim (c,Ra,Cb)}{dim(a,Ra,Ca), dim(b,Ca,Cb)} \\
\lcomment{the first attribute of $ dim $ is predicate-valued} \\
\\
\lheader{\sf end macro.} \\
\\
\lheader{{\sf module} $ Main $.} \\
\\
\lrule{}{a1 (1,J,Va)}{J \geq 1, dim (a1,Ra1,Ca1), J \leq Ca1, b(J,Va)} \\
\lrule{}{a1 (I,J,Va)}{dim (a1,Ra1,Ca1), dim (c1, Rc1, Ca1), 
Rc1 \geq Ra1-1, c1 (I-1,J,Va)} \\
\lheader{$ matrixmult (a1,a2,c1) $.} \\
\lheader{$ matrixmult (b1,b2,c2) $.} \\
\lcomment{the $ matrixmult $ macro is expanded inline at these 
two occurrences} \\
\\
\lheader{\sf end module.} 
}
}
 
We allow the use of macro definitions as illustrated in 
Figure~\ref{fig:matmult}.
\reminder{Check this: The features described in this 
figure have not been implemented as yet.
Users can get some of the power of this feature using the standard
C preprocessor, as we describe later.}
Macros are expanded inline at their point of occurrence (with
name substitutions depending on the arguments).
 
Consider representing a matrix $ A_{i,j} $ as a predicate
$ a (i,j, A_{i,j}) $.
Let the two matrices $ A1 $ and $ C1 $ be defined in terms of each
other, using mutually recursive predicates $a1 $ and $ c1 $,
as shown in Figure~\ref{fig:matmult}.
The first row of $ c1 $ can be computed using the first row of $ a1 $ (and
the first column of $ a2 $).
Since the values of the elements in the $ i^{th} $ row of the matrix $ a1 $ 
depend on the values of the elements in the previous row of $ c1 $,
$ a1 $ and $ c1 $ are mutually recursive; hence,
$ matrixmult $ cannot be defined as a module that takes (via either
input parameters or imported modules) $ a $ and $ b $ and returns $ c $
(via an exported predicate), due to the acyclicity requirements imposed 
on modules.
Macros thus provide a convenient mechanism for avoiding repetitive 
programming (the alternative would have been to repeat essentially the 
same code for defining both $ c1 $ and $ c2 $ in $ Main $).
 
Predicate names that occur in the {\sf local} declaration ($ temp $ and
$ tempsum $ in this case)
are treated as ``local'' predicate names and unique names are 
generated for these predicates at expansion time, to prevent
name clashes between different expansions of the same macro definition.
 
The features described earlier in this section have not been implemented as
yet.
However, most of the features can be simulated using macros defined with
the standard C preprocessor.
The following code illustrates how to do this.
 
\begin{tabbing}
$ \#define~~multiply(mata,matb,matc) \backslash 	$ \\
$~~~~ matc/**/\_temp(R,C,0,0). \backslash $ \\
$~~~~ 	\backslash $ \\
$~~~~ matc/**/\_temp(R, C, I, V) :- \backslash $ \\
$~~~~    mata( R, I, Va), I >= 1, J=I-1, matc/**/\_temp(R,C,J,V1), 
\backslash $ \\
$~~~~    matb(I, C, Vb), V0 = Va*Vb, V = V0+V1 .\backslash $ \\
$~~~~ 	\backslash $ \\
$~~~~ matc(R,C,V) :- mata/**/\_dim(A,B), matb/**/\_dim(B,E), 
	matc/**/\_temp(R,C,B,V).\backslash $ \\
$~~~~ matc/**/\_dim(A,C) :- mata/**/\_dim(A,B), matb/**/\_dim(B,C) $
\end{tabbing}
 
To define matrix $ c $ as the result of multiplying matrices $ a $ and $ b $ 
we must first define facts $ a\_dim(n,m) $ and $ b\_dim(m,p) $ (for
appropriate values of $ n,m,p $).
Then we add a line $ multiply(a,b,c) $ in order to get the rules defining 
matrix multiplication expanded in line.
 
The syntax for defining macros is:
\begin{quote}
{\sf macro} {\tt <macro-name> (<parameter-list>) <macro-definitions>}
\end{quote}
The macro definition could contain, for instance, rules with (head or
body) occurrences of predicate names from {\tt <parameter-list>} (beside
other predicate names).
Predicate names that occur in the macro but not in the 
{\tt <parameter-list>} are treated as ``local'' predicate names and
care must be taken to ensure that local predicates are given distinct
names across different macro expansions.

\section{Related Systems}
\label{sec:relatedwork}

There are many similarities between CORAL and deductive database systems
such as Aditi (\cite{vrkss:aditi}), EKS-V1 (\cite{vbkl:eksv1:short}),
LDL (\cite{nt:ldl:bk,ldl:proto}),
Glue-NAIL! (\cite{muv:des,pdr91:glue}) and Starburst SQL 
(\cite{mpr90:aggregates}).
However, there are several important differences, and
CORAL extends all the above systems in the following ways:

\begin{enumerate}
\item CORAL supports a larger class of programs, including
programs with non-ground facts and non-stratified negation
and set-generation.
\item CORAL supports a wide range of evaluation techniques, 
and gives the user considerable control over the choice
of techniques.  
The emphasis has been on providing a range of alternatives
rather than on automatically choosing an alternative.
\item CORAL is extensible --- new data and relation types and index 
implementations can be added without modifying the rest of the system.
\end{enumerate}

With respect to EKS-V1, we note that it is the only
system that supports integrity constraint checking.  It
also supports hypothetical reasoning.  Aditi is unique in giving primary
importance to disk-resident data.  

LDL++, a successor to LDL under development at MCC Austin, is reportedly
also moving in the direction taken by CORAL in many respects.  It will
be partially interpreted, support abstract data types, and use a
local semantics for choice (Carlo Zaniolo, personal communication).

In comparison to logic programming systems, such as various implementations
of Prolog, CORAL provides better indexing facilities and support for
persistent data.  Most importantly, the declarative intended model
semantics is supported (for all positive Horn clause programs, and
a large class of programs with negation and aggregation as well).

Modules serve as the units of compilation, and several evaluation
choices can be specified on a per-module basis.
Unlike Glue-NAIL! and LDL, where modules have only a compile-time meaning and 
no run-time meaning, modules in CORAL have important run-time semantics.
Several run-time optimizations are done at the module level.
For instance,
modules provide a very useful unit for discarding intermediate facts---this
is important with bottom-up computation, since facts that are computed are
generally not discarded anywhere else, and would use excessive amounts of 
memory.  
Modules with run-time semantics are also available in several
production rule systems (for example, RDL1~\cite{kms:rdl1}).

\sections{Future Directions}
\label{sec:future}

A number of issues require further work.  These include
support for metaprogramming, constraints, disk-resident data,
new data types and operations, user interfaces,
inheritance and object orientation.  

\sections{Acknowledgements}

We would like to acknowledge our debt to LDL, NAIL!, SQL, Starburst,
and various implementations of Prolog from which we have borrowed
numerous ideas.
We would like also to acknowledge the contributions of Per Bothner, 
who played a principal role in the implementation of the first prototype
of CORAL, and Praveen Seshadri, who contributed significantly to the
implementation of CORAL.



\section{Imperative Modules}

An imperative module is a program in an imperative language (currently,
C++) augmented by adding a layer of new types and constructs.
We introduce some features of imperative modules using the program in 
Figure~\ref{fig:imperative}, which updates the salary of a person depending on
the number of employees that work for the person (directly or indirectly).
\footnote{The syntax used for describing the program is tentative.}

The program gives an intuitive idea of the features we provide for
imperative modules.
These features are described in detail below.
The syntax described below is just a first cut; we are working on
improving it.

\begin{figure}
\ordinalg{
\> void Update\_Sals (Relation *emp) \\
\> $\{$ \+ \\
%%%%%%%%%
% \> TupleType(EmpTuple)
% \> \> TupleArg(1, String, ename)
% \> \> TupleArg(2, String, mname)
% \> \> TupleArg(3, Integer, sal)
% \> EndTupleType
%%%%%%%%%
\> Relation *numofemps = new IndexedRelation(2); \\
\> Tuple *query = new Tuple (make\_arglist2 (make\_var(0), make\_var(1))); \\
\> call\_coral ("numofemps", query, numofemps); \\
\> int n; \\
\> Tuple *emp\_tuple, *result\_tuple; \\
\\
\> FOR\_EACH\_TUPLE (emp\_tuple, emp)  $\{$ \\
\> \> Tuple pattern (make\_arglist2(emp\_tuple-$>$arg(1), make\_var(0))); \\
\> \> \> /* Future versions of CORAL will references to attributes by name*/ \\
\> \> int newsal = make\_int(emp\_tuple-$>$arg(2)); \\
\> \> FOR\_EACH\_MATCHING\_TUPLE(result\_tuple, numofemps, pattern) \{ \\
\> \> \> newsal += (int) (10* make\_int(result\_tuple-$>$arg(1)));\\
\> \> $ \} $ END\_EACH\_TUPLE\\
\> \> update\_tuple(emp\_tuple,2,make\_arg(newsal));\\
\> $ \} $ END\_EACH\_TUPLE\\
\- \\
\> $\}$
}

{
\setrulebodywidth{2.75in}
\logicprog{
\lheader{{\bf module} Employee.} \\
\lheader{{\bf export} $numofemps$ (bf).} \\
\\
\lrule{}{worksfor(E,M)}{emp(E,M,S)} \\
\lrule{}{worksfor(E,M)}{worksfor(E,E1),emp(E1,M,S)} \\
\lrule{}{numofemps(M,count(set(<E>)))}{worksfor(E,M)} \\
\\
\lheader{\bf end module.}
}
}
\caption{Updating Employee Salaries}
\label{fig:imperative}
\end{figure}

\subsection{The Relation Abstraction}

The basic object the C++ user needs to understand to be able to
interface with CORAL is the {\em relation}, which can be treated
as a set of tuples.
\footnote{We provide a library of routines to create, access and
manipulate tuples and arguments of tuples.  This library is
described in Section \ref{sec:lib}.}
The user can create two types of relations: {\em unindexed relations\/}
and {\em indexed relations\/} using any of the following C++ declarations:

\ordinalg{
\> \> UnindexedRelation rel\_name (arity); \\
\> \> IndexedRelation rel\_name (arity); \\
\> \> UnindexedRelation *rel\_name\_ptr = new UnindexedRelation (arity); \\
\> \> IndexedRelation *rel\_name\_ptr = new IndexedRelation (arity);
}

Unindexed relations are organized as a linked list of tuples,
and indexed relations are organized as a hash table, with the hashing
done on {\em all\/} arguments of the relation.
Additional indices can be created on hashed relations,
but not on linked relations (index creation calls on such
relations are simply ignored).
A user may wish to create unindexed relations for reasons of efficiency:
if every access of the relation involves scanning all tuples,
and no index-based retrievals are done, the user may wish to avoid
the overheads of indexing.
Each of the two relation types can be viewed as a subtype of 
{\em generic relation}.
Hence, the following declarations are also permissible:

\ordinalg{
\> \> Relation *rel\_name\_ptr = new UnindexedRelation (arity); \\
\> \> Relation *rel\_name\_ptr = new IndexedRelation (arity);
}

The user cannot, however, create a relation which is not either
an unindexed relation or an indexed relation, i.e., the following 
declaration is not allowed:

\ordinalg{
\> \> Relation rel\_name (arity);
}

\noindent The above declarations created non-persistent relations.
To create a persistent relation, the user can use the following:

\ordinalg{
\> \> IndexedRelation *create\_db\_relation (char *db\_rel\_name, int arity);
}
\noindent By default, database relations are indexed relations.
\footnote{Persistent relations are not supported currently.}

\subsection{Initializing Relations}

The above declarations create (new) empty relations, by default.
The user can also initialize a relation with the answers to a 
query on an exported predicate from a CORAL declarative module.
Either of the following functions can be called:

\ordinalg{
\> \> int call\_coral (char *exported\_pred\_name, Tuple *query\_arglist, 
		Relation *result); \\
\> \> int call\_coral (char *exported\_pred\_name, 
                Relation *set\_of\_query\_arglists,
                Relation *result);
}
\noindent These procedures add the answers to the result relation.
When called with an empty relation, it can be used to initialize it.
In the first form, a single query is provided, whereas in the second
form a set of queries is provided, as a relation.
In later versions of CORAL we plan to allow the inline declaration of 
a declarative module within imperative code.
This will also provide a simpler syntax for calling declarative modules
and could be interpreted as a direct extension of the C++ language.

\noindent A C++ user can also access a database relation (not just get a 
copy of it) using the following:

\ordinalg{
\> \> Relation *find\_db\_relation (char *db\_rel\_name, int arity);
}

Relations can be written on files using the following:
\ordinalg{
\> \> Relation::print\_on (File *file);
}
In later versions of CORAL we will provide primitives for facts stored 
in a text file to be read directly into a relation.

\subsection{Modifying Relations Using C++}

Relations can be viewed as sets of tuples.
Once a relation has been created (and possibly initialized),
the user may want to update the relation by inserting, deleting
or modifying tuples in a variety of ways.
Tuples can be inserted in a relation as follows:

\ordinalg{
\> \> int Relation::insert\_tuple (Tuple *tuple\_name);
}

Only database relations, and relations created by the C++ user can be 
updated in this fashion.  
The C++ user has no direct access to derived relations defined by
a declarative CORAL module.

We provide two iterative constructs for accessing the tuples of a relation
(the tuples are returned in an arbitrary order).
These are implemented as C++ macros:

\ordinalg{
\> \> FOR\_EACH\_TUPLE (Tuple *tuple, Relation *rel\_name) $\{ $ \\
\> \> \ldots \\
\> \> $\}$ END\_EACH\_TUPLE \\
\\
\> \> FOR\_EACH\_MATCHING\_TUPLE (Tuple *tuple, Relation *rel, 
        Tuple *matching\_tuple) $\{ $ \\
\> \> \ldots \\
\> \> $\}$ END\_EACH\_TUPLE 
}

\noindent Once a tuple in a relation is accessed using the iterative constructs,
it can be removed from the relation using the following:

\ordinalg{
\> \> int delete\_tuple (Tuple *tuple);
}

\noindent Similarly, once a tuple is accessed using the iterative constructs,
the user may modify it using the following construct:

\ordinalg{
\> \> int update\_tuple (Tuple *tuple, int arg\_position, Arg *new\_arg\_val);
}

The user may want to delete or modify a tuple provided that it satisfies 
certain conditions which the user may want to check using C++.
This would typically be done within a loop to iterate over those tuples
of a relation that match a given pattern (query) for the relation.
An example of this is shown in Figure~\ref{fig:imperative}.

\subsection{Defining Predicates Using C++ Procedures}

Often a user of declarative CORAL may want to define a predicate using 
C++ for reasons of efficiency, or otherwise.
For any such predicate, the user needs to specify the following
function which, given a (partially instantiated) tuple, 
returns the complete set of tuples that matches the given query tuple.

\ordinalg{
\> \> Relation *pred\_name\_solver (Tuple *query\_tuple); \\
\> \> Tuple *pred\_name\_solver (Tuple *query\_tuple);
}

The latter form may be used when the query always has a single answer.
In addition, the declarative CORAL user has to specify that this 
function will be accessed to answer queries from within declarative CORAL.
This is done as follows:

\ordinalg{
\> \> int add\_builtin (char *pred\_name, Relation *pred\_name\_solver 
	(Tuple *));\\
\> \> int add\_builtin (char *pred\_name, Tuple *pred\_name\_solver (Tuple *));
}


\subsection{Arguments and Tuples}
\label{sec:lib}

To write C++ code that defines a predicate, or that modifies a CORAL
relation, we provide a library that allows the
C++ user to manipulate arguments and tuples.  This library
includes routines to examine as well as to construct arguments
and tuples.
An $n$-tuple can be viewed as a list of $n$ arguments, each of which is a
Herbrand term.
We first describe several constructors to create Herbrand terms, and then
describe how to create tuples of terms.
A Herbrand term $t$ is either a constant, or a variable, or of the form
$f (t_1, \ldots, t_m)$, where each of $t_i, 1 \leq i \leq m$ are terms,
and $f$ is an $m$-ary function symbol.

Any of the following can be used to create constant arguments (integers and
reals are treated as constants):

\ordinalg{
\> \> Arg *make\_arg (int i); \\
\> \> Arg *make\_arg (long i); \\
\> \> Arg *make\_arg (double n); \\
\> \> Arg *make\_arg (char *name);
}

In CORAL, variables are identified internally by numbers; names are only used
for printing purposes.
Any of the following can be used to create variables:
\ordinalg{
\> \> Arg *make\_var (int var\_num); \\
\> \> Arg *make\_var (char *print\_name, int var\_num);
}
\noindent A complex term (using a functor) can be created as follows:
\ordinalg{
\> \> Arg *make\_arg (char *func\_name, ArgList *args);
}
\noindent We also provide special syntax to create Lisp/Prolog style lists:
\ordinalg{
\> \> Arg *make\_cons (Arg *head, Arg *tail);
}

\noindent An $n$-ary list of terms can be created using the following:

\ordinalg{
\> \> ArgList *make\_arglist (n, Arg *a1, Arg *a2, ..., Arg *an); \\
\> \> ArgList *make\_arglist1 (Arg *a1); \\
\> \> ArgList *make\_arglist2 (Arg *a1, Arg *a2); \\
\> \> ArgList *make\_arglist3 (Arg *a1, Arg *a2, Arg *a3); \\
\> \> ArgList *make\_arglist4 (Arg *a1, Arg *a2, Arg *a3, Arg *a4); \\
\> \> ArgList *concat\_arglists (ArgList *alist1, ArgList *alist2);
}

\noindent A tuple can be created using any of the following:

\ordinalg{
\> \> Tuple tuple\_name (ArgList *alist); \\
\> \> Tuple *tuple\_name\_ptr = new Tuple (ArgList *alist);
}

\noindent We can refer to an argument of a tuple using the following 
member function:
\ordinalg{
\> \> Arg *\& Tuple::arg(int arg\_pos);
}
This function returns a reference to the argument (which is an $ Arg * $),
and so we can use the function as an operand of an expression,
or can assign to it the result of an expression.
Later versions of the system will allow named, and possibly typed,
attributes for tuples and relations.

We provide a number of functions to make tuples and terms accessible
to C++ code.

\ordinalg{
\> \> int is\_constant (Arg *a); \\
\> \> int is\_variable (Arg *a); \\
\> \> int is\_functor (Arg *a); \\
\\
\> \> int make\_int (Arg *a); \\
\> \> long make\_long (Arg *a); \\
\> \> double make\_double (Arg *a); \\
\> \> char *make\_string (Arg *a);
}

\noindent Arguments and tuples can be written on files using the following:
\ordinalg{
\> \> Arg::print\_on(File *file);\\
\> \> ArgList::print\_on(File *file);\\
\> \> Tuple::print\_on (File *file); 
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\vspace{-4mm}
\bibliographystyle{alpha}
\bibliography{/var/home/raghu/bib/moredb,/var/home/raghu/bib/divdbimp,/var/home/raghu/bib/
clp}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

