/* Memory allocation */

#include "params.h"
#include "gambit.h"
#include "struct.h"
#include "os.h"
#include "mem.h"
#include "strings.h"
#include "opcodes.h"
#include "stats.h"
#include "main.h"


/*---------------------------------------------------------------------------*/


void (*temp_cont)();
char *heap_area1, *heap_area2;
long random_seed;
long processor_id;
PSTATE_PTR processor_state[MAX_NB_PROC];



void init_system_mem1();


void init_system_mem( cont )
void (*cont)();
{

/*

This procedure allocates storage that is global to the system and used
by all processors.  This storage is subdivided into 3 areas:

  1 - system state
  2 - global table (contains Scheme global variables)
  3 - constant space (contains the code for procedures and constant objects)

The part containing the global variables is allocated on processor 0 (as
shared memory) and every processor get's a copy of the rest.  The block
is organized as follows:

                  _________
       / -0x8000 |    .    | \
      |          |    .    |  |
      |          |    .    |  | 6528 global variables (each var.
      |          |    .    |  | occupies 8 bytes, the first 4 are the
shared           |    .    |  | variable's value the next 4 are the jump
      | A6 --> 0 |    .    |  | address)
      |          |    .    |  |
      |          |    .    |  |
       \         |_________| /
                 |    .    | \
       /         |    .    |  | 6528/2 'global jump' trampolines (each occupies
      |          |    .    |  | 4 bytes and corresponds to 'jmp 0x7efe(A6)')
      |          |_________| /
      |   0x7f00 |    .    | \
      |          |    .    |  | 32 trap handler trampolines (each occupies 8
      |          |    .    |  | bytes and correspond to 'jmp adr')
copy             |_________| /
      |   0x8000 |    .    | \
      |          |    .    |  |
      |          |    .    |  | 'constants' area
      |          |    .    |  |
      \          |_________| /


*/

  temp_cont = cont;

  os_shared_copy_malloc8(
    (long)(ceiling8( sizeof(struct sstate_rec) ) +
           ((long)MAX_NB_GLOBALS)*sizeof(struct global_rec)),
    (long)(((long)MAX_NB_GLOBALS)*sizeof(short) +
           ((long)NB_TRAPS)*sizeof(struct trap_rec) +
           ceiling8( const_len ))
    , 0L
    , init_system_mem1 );
}


void init_system_mem1( const_area )
char *const_area;
{ long nb_processors;

  if (const_area == NULL)
  { os_warn( "Can't allocate constant area\n", 0L ); os_quit(); }

  nb_processors = os_nb_processors();

  if (remote) nb_processors = 1;

  if (nb_processors > MAX_NB_PROC)
  { nb_processors = MAX_NB_PROC;
    os_warn( "Maximum number of processors (%d) will be used\n",
             (long)MAX_NB_PROC );
  }

  { char *ptr1 = const_area + ceiling8( sizeof(struct sstate_rec) );
    char *ptr2 = ptr1 + ((long)MAX_NB_GLOBALS)*(sizeof(struct global_rec) + sizeof(short) );
    char *ptr3 = ptr2 + ((long)NB_TRAPS)*sizeof(struct trap_rec);
    long i;

    sstate = (SSTATE_PTR)const_area;
    sstate->globals    = (GLOBAL_PTR)ptr1;
    sstate->tramps     = (TRAMP_PTR)(ptr2-((long)MAX_NB_GLOBALS)*sizeof(short));
    sstate->traps      = (TRAP_PTR)ptr2;
    sstate->const_bot  = ptr3;
    sstate->const_bptr = ptr3;
    sstate->const_tptr = ptr3 + ceiling8( const_len );
    sstate->const_top  = ptr3 + ceiling8( const_len );
    sstate->nb_ofiles  = 0;

    /* init global variable table and global jump trampolines */

    for (i=0; i<(long)MAX_NB_GLOBALS; i++)
    { sstate->globals[i].value    = (long)SCM_unbound;
      sstate->globals[i].jump_adr = (long)&sstate->tramps[i];
      if (i & 1)
        sstate->tramps[i] = JMPA6_DISP_OP;
      else
        sstate->tramps[i] = 0x7efe; /* offset and opcode for moveq #-2,d7 */
    }
    sstate->tramps[((long)MAX_NB_GLOBALS)-1] = NOP_OP;

    temp_cont( nb_processors );
  }
}


long alloc_const_proc( len, obj )
long len;
SCM_obj *obj;
{ long len1 = len + 4;        /* length including header                */
  long len2 = ceiling8(len1); /* length including padding for alignment */
  char *temp = sstate->const_bptr;
  if (temp + len2 > sstate->const_tptr)
  { os_err = "Constant area overflow"; return 1; }
  sstate->const_bptr = temp + len2;
  *(short *)temp = 0x8000 + len;
  *obj = (SCM_obj)(temp + SCM_type_PROCEDURE);
  return 0;
}


long alloc_const_pair( obj )
SCM_obj *obj;
{ if (sstate->const_tptr-8 < sstate->const_bptr)
  { os_err = "Constant area overflow"; return 1; }
  sstate->const_tptr -= 8;
  *obj = (SCM_obj)(sstate->const_tptr + SCM_type_PAIR);
  return 0;
}


long alloc_const_subtyped( len, subtype, obj )
long len, subtype;
SCM_obj *obj;
{ long len1 = len + 4;        /* length including header                */
  long len2 = ceiling8(len1); /* length including padding for alignment */
  if (sstate->const_bptr+len2 > sstate->const_tptr)
  { os_err = "Constant area overflow"; return 1; }
  *obj = (SCM_obj)(sstate->const_bptr + SCM_type_SUBTYPED);
  *(long *)(sstate->const_bptr) = SCM_make_header( len, subtype );
  sstate->const_bptr += len2;
  return 0;
}


long alloc_const_vector( len, obj )
long len;
SCM_obj *obj;
{ return alloc_const_subtyped( len*sizeof(SCM_obj), (long)SCM_subtype_VECTOR, obj );
}


long alloc_const_string( str, obj )
char *str;
SCM_obj *obj;
{ SCM_obj string_adr;
  char *p = str;
  long len = 0;
  while (*(p++) != '\0') len++;
  if (alloc_const_subtyped( len+1, (long)SCM_subtype_STRING, &string_adr )) return 1;
  p = (char *)(string_adr - SCM_type_SUBTYPED + 4);
  while (*str != '\0') *(p++) = *(str++);
  *p = '\0'; /* so that C will understand this as a string */
  if ((((long)p) & 7) == 0) { *(long *)p = 0; *(long *)(p+4) = 0; }
  *obj = string_adr;
  *(long *)(string_adr-SCM_type_SUBTYPED) = SCM_make_header( len, SCM_subtype_STRING );
  return 0;
}


void define_c_proc( name, proc )
char *name;
void (*proc)();
{ SCM_obj proc_adr;
  short *code_ptr;
  char *str = c_id_to_symbol( name );
  if (str == NULL)
  { os_warn( "Can't convert C identifier to Scheme symbol\n", 0L ); os_quit(); }
  if (alloc_const_proc( 16L, &proc_adr ))
  { os_warn( "%s\n", (long)os_err ); os_quit(); }
  code_ptr = (short *)proc_adr;

  *(code_ptr++) = MOVE_L_IMM_A1_OP;      /* move.l #adr,a1    */
  *(void (**)())code_ptr = proc;  code_ptr += 2;
  *(code_ptr++) = JMPA6_DISP_OP;         /* jmp    C_CALL(a6) */
  *(code_ptr++) = table_offset( &sstate->traps[C_CALL_trap].jmp );
  *((SCM_obj *)code_ptr) = SCM_false;  code_ptr += 2;
  *((SCM_obj *)code_ptr) = SCM_int_to_obj( 2 );

  if (set_global( str, proc_adr )) { os_warn( "%s\n", (long)os_err ); os_quit(); }
}


/*---------------------------------------------------------------------------*/


void init_processor_mem1();
void init_processor_mem2();
void init_processor_mem3();
void init_processor_mem4();


void init_processor_mem( cont )
void (*cont)();
{

/*

This procedure allocates storage associated with each processor.
Specifically, there are 8 areas of storage per processor:

  1 - table used to store events
  2 - local heap (for storing non-Scheme objects)
  3 - counters used for statistics gathering
  4 - code area for the emulation of M68020 and M68881 instructions
  5 - processor state
  6 - Scheme heap (where the processor allocates most Scheme objects)
  7 - counters used for profiling (if requested)
  8 - the stack (and lazy-task queue and dynamic-binding queue)
      note: the lazy-task queue could be in private-memory for the message
      passing steal protocol but, to test the alternative shared memory steal
      protocol, it is put in shared memory (on the butterfly this doesn't
      affect performance anyway)

The processor state is a structure that contains a number of fields that
describe a given processor (i.e. processor number, heap location,
stack location, etc...).

The Scheme heap is a block of memory containing two equaly sized sub-heaps
each starting on an octuple address:

processor.heap_bot       _________
          ------------> |    .    | \
                        |    .    |  | sub-heap 1 (first to be used)
                        |    .    |  |
                        |_________| /
                        |    .    | \
                        |    .    |  | sub-heap 2
                        |    .    |  |
processor.heap_top      |_________| /
          ------------>

*/

  temp_cont = cont;

  init_stats();

  processor_id = 0;
  random_seed = 0;

  init_processor_mem1();
}


void init_processor_mem1()
{ long prof_len = ceiling8(sstate->profiling ? ((sizeof(short) * ceiling8( const_len )) >> PROF_SHIFT) : 0);

  os_shared_malloc8( (remote_stack ? 0 : (2*stack_len)) +
                     ceiling8( ((long)MAX_NB_EVENTS)*sizeof(long) ) +
                     ((long)LOCAL_HEAP_LENGTH_IN_K)*K +
                     ceiling8( ((long)MAX_NB_STATS)*sizeof(long) ) +
                     ceiling8( ((long)MAX_EMUL_CODE_LENGTH_IN_K)*K ) +
                     ceiling8( sizeof(struct pstate_rec) ) +
                     prof_len +
                     (remote_heap ? 0 : heap_len),
                     processor_id,
                     init_processor_mem2 );
}


void init_processor_mem2( ptr )
char *ptr;
{ if (ptr == NULL)
  { os_warn( "Can't allocate heap area\n", 0L ); os_quit(); }

  heap_area1 = ptr;

  if (remote_heap)
    os_shared_malloc8( heap_len, 1L, init_processor_mem3 );
  else
    init_processor_mem3( heap_area1 );
}


void init_processor_mem3( ptr )
char *ptr;
{ if (ptr == NULL)
  { os_warn( "Can't allocate remote heap\n", 0L ); os_quit(); }

  heap_area2 = ptr;

  if (remote_stack)
    os_shared_malloc8( 2*stack_len, 1L, init_processor_mem4 );
  else
    init_processor_mem4( heap_area1 );
}


void init_processor_mem4( ptr )
char *ptr;
{ if (ptr == NULL)
  { os_warn( "Can't allocate remote stack\n", 0L ); os_quit(); }

  { long prof_len = ceiling8(sstate->profiling ? ((sizeof(short) * ceiling8( const_len )) >> PROF_SHIFT) : 0);

    char *ptr0 = heap_area1 + (remote_stack ? 0 : (2*stack_len));
    char *ptr1 = ptr0 + ceiling8( ((long)MAX_NB_EVENTS)*sizeof(long) );
    char *ptr2 = ptr1 + ((long)LOCAL_HEAP_LENGTH_IN_K)*K;
    char *ptr3 = ptr2 + ceiling8( ((long)MAX_NB_STATS)*sizeof(long) );
    char *ptr4 = ptr3 + ceiling8( ((long)MAX_EMUL_CODE_LENGTH_IN_K)*K );
    char *ptr5 = ptr4 + ceiling8( sizeof(struct pstate_rec) );
    char *ptr6 = (remote_heap ? heap_area2 : (ptr5+prof_len));
    PSTATE_PTR p = (PSTATE_PTR)ptr4;
    long i;

    processor_state[processor_id] = p;

    p->id                = SCM_int_to_obj(processor_id);
    p->nb_processors     = SCM_int_to_obj(nb_processors);
    p->stats_counters    = (long *)ptr2;
    p->local_heap_bot    = ptr1;
    p->local_heap_top    = ptr2;

    p->stack_bot         = (long *)ptr;
    p->stack_top         = (long *)(((char *)p->stack_bot) + stack_len);
    p->q_bot             = (long **)p->stack_top;
    p->q_top             = (long **)(((char *)p->stack_top) + stack_len);
    p->stack_max_margin  = ((stack_len-STACK_ALLOCATION_FUDGE*4)/8) & -8L;
    p->stack_margin      = p->stack_max_margin;

    p->heap_bot          = ptr6;
    p->heap_top          = ptr6 + heap_len;
    p->heap_mid          = ptr6 + heap_len/2;
    p->heap_max_margin   = ((heap_len/2-HEAP_ALLOCATION_FUDGE*4)/16) & -8L;
    p->heap_margin       = p->heap_max_margin;
    p->elog_bot          = (long *)ptr0;
    p->elog_top          = ((long *)ptr1)-2;
    p->prof_bot          = (short *)ptr5;
    p->prof_top          = (short *)(ptr5+prof_len);
    p->emul_code_bot     = ptr3;
    p->emul_code_top     = ptr4;

    p->intr_flag         = -1;
    p->heap_old          = p->heap_mid;
    p->heap_lim          = p->heap_bot + p->heap_margin + ((long)HEAP_ALLOCATION_FUDGE)*4;
    p->heap_ptr          = p->heap_mid;
    p->closure_lim       = p->heap_ptr;
    p->closure_ptr       = p->heap_ptr;
    p->workq_lockO       = 0;  /* work queue initially unlocked */
    p->workq_lockV       = 0;
    p->workq_tail        = SCM_null;
    p->workq_head        = SCM_null;
    p->steal_scan        = 0;
    p->elog_ptr          = p->elog_top;
    p->elog_top[0]       = 0;
    p->elog_top[1]       = 0;
    p->emul_code_ptr     = p->emul_code_bot;
    p->local_heap_ptr    = p->local_heap_bot;

    p->steal_lockO       = 0;
    p->steal_lockV       = 0;

    p->stack_ptr         = p->stack_top;
    p->ltq_tail          = p->q_bot;
    *(p->ltq_tail++)     = p->stack_ptr;
    p->ltq_head          = p->ltq_tail;
    p->deq_tail          = p->q_top;
    *(--p->deq_tail)     = p->stack_ptr;
    p->deq_head          = p->deq_tail;

    { long **z = p->ltq_tail;
      while (z != p->deq_tail) *z++ = NULL;
    }

    p->response          = 0;
    p->thief             = 0;

    p->intr_other        = 0;
    p->intr_barrier      = 0;
    p->intr_timer        = 0;
    p->intr_user         = 0;

    p->sync1             = -2;
    p->sync2             = -2;

    p->count1            = 0;
    p->count2            = 0;

    for (i=(sizeof(p->processor_storage)/sizeof(SCM_obj))-1; i>=0; i--)
      p->processor_storage[i] = 0;
  }

  processor_id++;

  if (processor_id<nb_processors)
    init_processor_mem1();
  else
  { long i, j, index;

    for (i=0; i<nb_processors; i++)  /* setup table of processors on each proc */
    { PSTATE_PTR *p1 = processor_state[i]->ps, *p2 = processor_state;
      PSTATE_PTR *p3 = processor_state[i]->steal_ps;
      for (j=0; j<nb_processors; j++) *(p1++) = *(p2++);
      *(p3++) = processor_state[i];
      for (j=1; j<nb_processors; j++) *(p3++) = processor_state[(i+j)%nb_processors];

      for (j=1; j<nb_processors; j++)  /* shuffle to randomize steal pattern */
      { long k = random_seed % (nb_processors-j);
        PSTATE_PTR temp = *(--p3);
        *p3 = *(p3-k);
        *(p3-k) = temp;
        random_seed = (random_seed * 7001 + 1) & 0x7fffffffL;
      }
    }

    pstate = processor_state[0];
  
    if (alloc_vector( (long)SYMBOL_TABLE_LENGTH, &sstate->globals[SYMBOL_TABLE].value )) os_quit();

    for (i=0; i<SYMBOL_TABLE_LENGTH; i++)
      SCM_obj_to_vect(sstate->globals[SYMBOL_TABLE].value)[i] = SCM_null;
    sstate->globals[GLOBAL_VAR_COUNT].value = SCM_int_to_obj( 0 );

    if (alloc_global( "##symbol-table", &index ) ||               /* variable # 0 */
        alloc_global( "##global-var-count", &index )) os_quit();  /* variable # 1 */

    temp_cont();
  }
}


long alloc_pair( obj )
SCM_obj *obj;
{ if (pstate->heap_ptr-8 < pstate->heap_lim)
  { os_err = "Heap overflow"; return 1; }
  pstate->heap_ptr -= 8;
  *obj = (SCM_obj)(pstate->heap_ptr + SCM_type_PAIR);
  return 0;
}


long alloc_subtyped( len, subtype, obj )
long len, subtype;
SCM_obj *obj;
{ long len1 = len + 4;        /* length including header                */
  long len2 = ceiling8(len1); /* length including padding for alignment */
  if (pstate->heap_ptr-len2 < pstate->heap_lim)
  { os_err = "Heap overflow"; return 1; }
  pstate->heap_ptr -= len2;
  *(long *)(pstate->heap_ptr) = SCM_make_header( len, subtype );
  *obj = (SCM_obj)(pstate->heap_ptr + SCM_type_SUBTYPED);
  return 0;
}


long alloc_vector( len, obj )
long len;
SCM_obj *obj;
{ return alloc_subtyped( len*sizeof(SCM_obj), (long)SCM_subtype_VECTOR, obj );
}


long alloc_symbol( name, obj )
char *name;
SCM_obj *obj;
{ SCM_obj probe, sym, sym_name;
  long len = 0, h = 0;
  while (name[len] != '\0')
    h = ((h<<8)+(unsigned)name[len++]) % (long)SYMBOL_TABLE_LENGTH;
  probe = SCM_obj_to_vect(sstate->globals[SYMBOL_TABLE].value)[h];
  while (probe != SCM_null)
  { sym = *(SCM_obj *)(probe-SCM_type_PAIR+PAIR_CAR*sizeof(SCM_obj));
    sym_name = SCM_obj_to_vect(sym)[SYMBOL_NAME];
    if (SCM_length( sym_name ) == len)
    { long i = len;
      char *str = SCM_obj_to_str(sym_name);
      while (i > 0) { i--; if (str[i] != name[i]) goto not_found; }
      *obj = sym;
      return 0;
    }
    not_found:
    probe = *(SCM_obj *)(probe-SCM_type_PAIR+PAIR_CDR*sizeof(SCM_obj));
  }

  if (alloc_subtyped( ((long)SYMBOL_SIZE)*sizeof(SCM_obj), (long)SCM_subtype_SYMBOL, &sym )) return 1;
  if (alloc_const_string( name, &SCM_obj_to_vect(sym)[SYMBOL_NAME])) return 1;
  SCM_obj_to_vect(sym)[SYMBOL_PLIST]  = SCM_null;
  SCM_obj_to_vect(sym)[SYMBOL_GLOBAL] = SCM_false;
  if (alloc_pair( &probe )) return 1;
  *(SCM_obj *)(probe-SCM_type_PAIR+PAIR_CAR*sizeof(SCM_obj)) = sym;
  *(SCM_obj *)(probe-SCM_type_PAIR+PAIR_CDR*sizeof(SCM_obj)) =
    SCM_obj_to_vect(sstate->globals[SYMBOL_TABLE].value)[h];
  SCM_obj_to_vect(sstate->globals[SYMBOL_TABLE].value)[h] = probe;

  *obj = sym;
  return 0;
}


long alloc_global( name, index )
char *name;
long *index;
{ SCM_obj sym;
  if (alloc_symbol( name, &sym )) return 1;
  return alloc_global_from_symbol( sym, index );
}


long alloc_global_from_symbol( sym, index )
SCM_obj sym;
long *index;
{ if (SCM_obj_to_vect(sym)[SYMBOL_GLOBAL] == SCM_false) /* var allocated? */
  { long i = SCM_obj_to_int( sstate->globals[GLOBAL_VAR_COUNT].value );
    if (i >= MAX_NB_GLOBALS)
    { os_err = "Global variable table overflow"; return 1; }
    SCM_obj_to_vect(sym)[SYMBOL_GLOBAL] = SCM_int_to_obj(i);
    sstate->globals[GLOBAL_VAR_COUNT].value = SCM_int_to_obj(i+1);
    *index = i;
  }
  else
    *index = SCM_obj_to_int(SCM_obj_to_vect(sym)[SYMBOL_GLOBAL]);
  return 0;
}


long set_global( name, value )
char *name;
SCM_obj value;
{ long index;
  if (alloc_global( name, &index )) return 1;
  sstate->globals[index].value = value;
  return 0;
}


char *local_malloc8( len )
long len;
{ char *temp1 = pstate->local_heap_ptr;
  char *temp2 = temp1 + ceiling8( len );
  if (temp2 > pstate->local_heap_top) return NULL;
  pstate->local_heap_ptr = temp2;
  return temp1;
}


char *local_mark()
{ return pstate->local_heap_ptr;
}


void local_release( mark )
char *mark;
{ pstate->local_heap_ptr = mark;
}


/*---------------------------------------------------------------------------*/
