/* $Header: /home/panda/pg/bevan/progs/elk/lib/RCS/gnu_regexp.c,v 1.5 91/04/02 14:32:25 bevan Exp $ */

/*+c
** DESCRIPTION
**   Regular Expressions for ELK
** These are based on the GNU regular expression code
** The code is written in K&R C to maintain compatibility with the rest of ELK
**
** USE:
**   To avoid name clashes, everything is prefixed by gnu.  This will help
** if you add a different regexp package, such as Henry Spencer's.
** 
** gnu:regexp
**   A type that represents gnu regular expressions.
**
** (gnu:make-regexp str)
**   Generate a regulare expression from the given string.  Does not return
**   if there is an error in the string.
**
** (gnu:regexp? obj)
**   Is the object a GNU regular expression?
**
** (gnu:regexp-exec regexp str start)
**   Apply the GNU regular expression `regexp' to the string `str' starting
**   at position `start'.  If the match succeeds it returns a gnu:regexp-match
**   It returns #f otherwise.
**
** gnu:regexp-match
**   A type that represents regular expression matches.
**
** (gnu:regexp-match? obj)
**   Is the object a GNU regular expression match.
**
** (gnu:regexp-start regexp-match match-number)
**   Returns the start position of the match denoted by `match-number'
**   The whole regexp is 0.  Each further number represents positions
**   enclosed by \\(\\) sections.
**
** (gnu:regexp-end regexp-match match-number)
**   Returns the end position of the match denoted by `match-number'
**   See the above.
**
**  Note the above produce values such that they can directly be used by
** substring 
**
** The interface to the regexp code is a mixture of the Emacs style and that to
** the regexp package in Python.  This has been done so that there is a low
** level on which packages to emulate either of the above could be written.
** For example one nice feature of Python's regexp is that it returns a list
** of the match positions.  This can easily be emulated by consing up a list
** from a gnu:match
**
** EXAMPLE
**
**  (define scheme-define-matcher
**    (gnu:make-regexp ""^[ \t]*(define[ \t]+(?\\([---A-Za-z0-9:?]+\\)[ \t]*"))
**
**  This creates a regexp that will match most of the scheme function names
**  I use. (Note it doesn't match symbolic ones like (define (* a b) ...))
**
**  (define str "(define (some-scheme-function a b) ...)")
**
**  (define match (gnu:regexp-exec scheme-define-matcher str 0))
**
**  (if (gnu:regexp-match? match)
**      (let ((name (substring str
**                             (gnu:regexp-start match 1)
**                             (gnu:regexp-end match 1))))
**        (display name))
**      (error 'foo "couldn't find a match"))
**
**  should output
**
**    some-scheme-function
**
*/


#include <scheme.h>

/* Note that a .c file is being included here.  This is so that a single .o
** file is created rather than one for this file and one for the GNU regex
** code.  The reason for not wanting two .o files is that I can't think how to
** reliably make sure that the GNU regex .o is loaded in before this .o (One
** solution would be to add an init section with a Provide in it to it, but I'd
** rather not do that)
*/
#include "gnu_regex.c"

/* Placeholders for the Gnu_Regexp and Gnu_RegexpMatch types. */
static int T_Gnu_Regexp;
static int T_Gnu_RegexpMatch;

/* A regular expression is represented as a pointer to 
** the compiled regular expression pattern.
** Is the nothing field really necessary?
*/
struct S_Gnu_Regexp
{
  Object nothing;
  struct re_pattern_buffer compiled_pattern;
  Object the_pattern;
};

/* A the result of a regular expression match is a structure
** storing the positions of the match.
** Is the nothing field really necessary ?
*/
struct S_Gnu_RegexpMatch
{
  Object nothing;
  struct re_registers matches;
};

/* Convert from a generic ELK pointer to GNUREGEXPs and GNUREGEXPMATCHs. */
#define GNUREGEXP(obj)	((struct S_Gnu_Regexp *)POINTER(obj))
#define GNUREGEXPMATCH(obj)	((struct S_Gnu_RegexpMatch *)POINTER(obj))

/* The following set of functions are the standard ones you have to define
** inorder to create a type for ELK.
*/

/* Two Gnu_Regexps are eqv? if they share the strings that they are a pattern of */

static int Gnu_Regexp_Eqv(a, b)
  Object a, b;
{
  return GNUREGEXP(a)->the_pattern == GNUREGEXP(b)->the_pattern;
}

/* Two Gnu_Regexps are equal? if they represent the same pattern. */

static int Gnu_Regexp_Equal(a, b)
  Object a, b;
{
  return General_Strcmp(GNUREGEXP(a)->the_pattern, GNUREGEXP(b)->the_pattern, 0) == 0;
}

static void Gnu_Regexp_Print(regexp, port, raw, depth, len)
  Object regexp, port;
  int raw, depth, len;
{
  Printf(port, "#[gnu:regexp ");
  Pr_String(port, GNUREGEXP(regexp)->the_pattern, 0);
  Printf(port, "]");
}

static void Gnu_Regexp_Visit(x, f)
  Object *x;
  void (*f)();
{
  struct S_Gnu_Regexp *p= GNUREGEXP(*x);
  (*f)(&(p->the_pattern));
}

static Object P_Gnu_Regexpp(x)
  Object x;
{
  return TYPE(x) == T_Gnu_Regexp ? True : False;
}

static int Gnu_RegexpMatch_Eqv(a, b)
  Object a, b;
{
  return EQ(a, b);
}

/* There is not good way to tell if two Gnu_RegexpMatches are equal rather than
** eqv, so just use the eqv definition.
*/
static int Gnu_RegexpMatch_Equal(a, b)
  Object a, b;
{
  return EQ(a, b);
}

static void Gnu_RegexpMatch_Print(regexpm, port, raw, depth, len)
  Object regexpm, port;
  int raw, depth, len;
{
  Printf(port, "#[gnu:regexp-match %lu]", POINTER(regexpm));
}

static Object P_Gnu_RegexpMatchp(x)
  Object x;
{
  return TYPE(x) == T_Gnu_RegexpMatch ? True : False;
}

/* End of standard functions */


/* Given a string, it compiles it into a regular expression. */
/* Is the Link + Unlink stuff necessary ? */

static Object P_Make_Gnu_Regexp(str)
  Object str;
{
  Object regexp;
  char *error;
  GC_Node2;

  regexp= Null;
  GC_Link2(str, regexp);
  Check_Type(str, T_String);
  regexp= Alloc_Object(sizeof(struct S_Gnu_Regexp), T_Gnu_Regexp, 0);
  GNUREGEXP(regexp)->nothing= Null;
  GNUREGEXP(regexp)->the_pattern= str;
  /* the value 40 is a arbitrary initial buffer size */
  GNUREGEXP(regexp)->compiled_pattern.allocated= 40;
  GNUREGEXP(regexp)->compiled_pattern.buffer= Safe_Malloc(40);
  GNUREGEXP(regexp)->compiled_pattern.fastmap= NULL;
  GNUREGEXP(regexp)->compiled_pattern.translate= NULL;
  error= re_compile_pattern(STRING(str)->data, STRING(str)->size, &GNUREGEXP(regexp)->compiled_pattern);
  GC_Unlink;
  if (error != (char *)0)
    Primitive_Error(error);
  return regexp;
}

static Object P_Gnu_Regexp_exec(regexp, str, start)
  Object regexp, str, start;
{
  int intStart;
  int errorCode;
  Object result;
  GC_Node4;

  result= Null;
  GC_Link4(regexp, str, start, result);
  Check_Type(regexp, T_Gnu_Regexp);
  Check_Type(str, T_String);
  result= Alloc_Object(sizeof(struct S_Gnu_RegexpMatch), T_Gnu_RegexpMatch, 0);
  GNUREGEXPMATCH(result)->nothing= Null;
  intStart= Get_Integer(start);
  errorCode= re_match(&GNUREGEXP(regexp)->compiled_pattern, STRING(str)->data,
		      STRING(str)->size, intStart, &GNUREGEXPMATCH(result)->matches);
  GC_Unlink;
  if (errorCode == -2)
    Primitive_Error("Gnu_Regexp Stack Overflow");
  return (errorCode == -1) ? False : result;
}

/* Return the start position of a particular regular expression match. */

static Object P_Gnu_RegexpMatch_Start(regexp_match, match_number)
  Object regexp_match, match_number;
{
  int int_match_number;
  Check_Type(regexp_match, T_Gnu_RegexpMatch);
  int_match_number= Get_Integer(match_number);
  if (int_match_number >= RE_NREGS || int_match_number < 0)
    Primitive_Error("Match number not in range ~s", match_number);
  return Make_Integer(GNUREGEXPMATCH(regexp_match)->matches.start[int_match_number]);
}

/* Return the end position of a particular regular expression match. */

static Object P_Gnu_RegexpMatch_End(regexp_match, match_number)
  Object regexp_match, match_number;
{
  int int_match_number;
  Check_Type(regexp_match, T_Gnu_RegexpMatch);
  int_match_number= Get_Integer(match_number);
  if (int_match_number >= RE_NREGS || int_match_number < 0)
    Primitive_Error("Match number not in range ~s", match_number);
  return Make_Integer(GNUREGEXPMATCH(regexp_match)->matches.end[int_match_number]);
}


/* Initialise the Gnu_Regexp extensions. */

void init_gnu_regexp()
{
  /* Define the Gnu_Regexp type */
  T_Gnu_Regexp= Define_Type(
    0,
    "gnu:regexp",
    NOFUNC,
    sizeof(struct S_Gnu_Regexp),
    Gnu_Regexp_Eqv,
    Gnu_Regexp_Equal,
    Gnu_Regexp_Print,
    Gnu_Regexp_Visit
  );
  Define_Primitive(P_Make_Gnu_Regexp, "gnu:make-regexp", 1, 1, EVAL);
  Define_Primitive(P_Gnu_Regexpp, "gnu:regexp?", 1, 1, EVAL);
  Define_Primitive(P_Gnu_Regexp_exec, "gnu:regexp-exec", 3, 3, EVAL);

  /* Define the Gnu_RegexpMatch type
  ** Notice that there is no public constructor for this type.
  ** The only way a Gnu_RegexpMatch can be created is as the result
  ** of a Gnu_Regexp-exec call
  */
  T_Gnu_RegexpMatch= Define_Type(
    0,
    "gnu:regexp-match",
    NOFUNC,
    sizeof(struct S_Gnu_RegexpMatch), 
    Gnu_RegexpMatch_Eqv,
    Gnu_RegexpMatch_Equal,
    Gnu_RegexpMatch_Print,
    NOFUNC
  );
  Define_Primitive(P_Gnu_RegexpMatchp, "gnu:regexp-match?", 1, 1, EVAL);
  Define_Primitive(P_Gnu_RegexpMatch_Start, "gnu:regexp-start", 2, 2, EVAL);
  Define_Primitive(P_Gnu_RegexpMatch_End, "gnu:regexp-end", 2, 2, EVAL);
  P_Provide(Intern("gnu_regexp.o"));
}
