/* [11oct93] (air)

*/

%{
#include <strings.h>

#define TRUE 1
#define FALSE 0

int in_doc = FALSE;
int in_text = FALSE;
int in_preamble = FALSE;
int first_para = FALSE;

char para_id[32];
int para_num, text_num;
%}


STRING              [^ \t\n]
WSPACE              [ \t]+
NL                  [\n]+

%%

"<DOCNO>".+"</DOCNO>"          {print_docno(yytext);}
"<DOC>"                        {start_document(yytext);}
"<TEXT>"                       {start_text(yytext);}
"</TEXT>"                      {printf("</p>\n%s\n",yytext); in_text = FALSE;}
"</DOC>"                       {printf("%s\n",yytext); in_doc = FALSE;}
^"   "                         {start_paragraph_id();}

{STRING}                       |
{WSPACE}                       |
{NL}                           {print_token(yytext);}

%%


start_document(char *string)
{
  printf("%s\n",string);
  in_doc = TRUE;
  in_preamble = TRUE;
  text_num=0;
}

start_paragraph_id()
{
return(0);
  para_num++;
  if (!first_para) printf("</p>\n");
  else first_para = FALSE;
  printf("<p.%s-%d.%d>\n",para_id,text_num,para_num);
}

start_text(char *string)
{
  printf("%s",string);
  in_text = TRUE; 
  in_preamble = FALSE;
  text_num++;
  first_para=TRUE;
  para_num=0;
}

print_docno(char *string) 
{
  char *str = malloc(strlen(string)+1);
  char *ptr;

  printf("%s\n",string);  /* preserve the doc number for future reference */
  strcpy(str,string);
  /* extract the id; we're totally dependent on well-behaved input... */
  *(rindex(str,' ')) = '\0';
  str = index(str,' ');
  strcpy(para_id,str+1);

  free(str);
}

print_token (char *string)  
{
   if (!in_preamble) return(0);
   if (!in_text) printf("%s",yytext);
}
 
/**/
