/*
 * PCN Abstract Machine Emulator
 * Authors:     Steve Tuecke and Ian Foster
 *              Argonne National Laboratory
 *
 * Please see the DISCLAIMER file in the top level directory of the
 * distribution regarding the provisions under which this software
 * is distributed.
 *
 * gc.c - garbage collector, based on Morris algorithm
 *
 * This garbage collection algorithm is described in the paper:
 *
 *	"A Time- and Space- Efficient Garbage Compaction Algorithm"
 *	F. Lockwood Morris
 *	Communications of the ACM
 *	August 1978, Volume 21, Number 8
 */

#include	"pcn.h"

static	cell_t *heap_last_used;	/* last used cell on heap */
static	u_int_t	cells_in_use;	/* number of cells in use on the heap */
static	int_t	structure_ptr_offset;	/* offset between _p_structure*ptr's */

static	void		mark_and_reverse();
static	void		m_and_r_proc_record();
static	void		m_and_r_process_queue();
static	void		m_and_r_suspension_queue();
static	void		m_and_r_irt();
static	void		m_and_r_current_proc();
static	void		m_and_r_pointer();
static	void		backward_scan();
static	void		forward_scan();


/*
 * _p_collect_garbage()
 *
 * Run the garbage collector.  Keep running the gc (while sending
 * out MSG_COLLECT messages between each try) until there are at least
 * 'min_collect' cells free on the heap.  If 'min_collect' is <0, then
 * use the various _p_heap* variables to determine if enough has
 * been collected.
 */
void _p_collect_garbage(min_collect)
int_t min_collect;
{
    bool_t done_with_gc = 0;
#ifdef DEBUG
    u_int_t old_size, new_size;
#endif /* DEBUG */
#ifdef GAUGE
    gauge_timer gc_start, gc_stop;

    _p_gauge_stats.gc_calls++;
    TIMER(gc_start);
#endif /* GAUGE */

    /*
     * Loop until we have collected enough space.  This is only
     * useful in a multiprocessing situation, since we can ask other nodes
     * to gc if we don't collect enough, and that might free up more for
     * this node.
     */
    while (!done_with_gc)
    {
	/*
	 * Check for heap overflow -- a bad thing...
	 */
	if (_p_heap_ptr > _p_heap_hard_top)
	{
	    fprintf(_p_stdout, "(%lu) GC: _p_heap_bottom=%lx, _p_heap_ptr=%lx, _p_heap_hard_top=%lx\n",
		    (unsigned long) _p_my_id, (unsigned long) _p_heap_bottom,
		    (unsigned long) _p_heap_ptr,
		    (unsigned long) _p_heap_hard_top);
	    fflush(_p_stdout);
	    _p_fatal_error("Heap overflow!  This should not happen!");
	}
	
	heap_last_used = _p_heap_ptr - 1;

#ifdef DEBUG
	if (GCDebug(2))
	{
	    old_size = _p_heap_ptr - _p_heap_bottom;
	    fprintf(_p_stdout, "(%lu,%lu) GC: Starting garbage collection\n",
		    (unsigned long) _p_my_id, (unsigned long) _p_reduction);
	    fflush(_p_stdout);
	}
	if (GCDebug(4))
	{
	    fprintf(_p_stdout, "(%lu) GC: _p_heap_bottom=%lx, heap_last_used=%lx, _p_heap_hard_top=%lx\n",
		    (unsigned long) _p_my_id, (unsigned long) _p_heap_bottom,
		    (unsigned long) heap_last_used,
		    (unsigned long) _p_heap_hard_top);
	    fflush(_p_stdout);
	}
#endif /* DEBUG */

#ifdef PDB
	if (GCDebug(5))
	{
	    fprintf(_p_stdout, "(%lu) GC:\t Process Queues - BEFORE\n",
		    (unsigned long) _p_my_id);
	    _pdb_print_all_processes();
	    fprintf(_p_stdout, "(%lu) GC:\t End of Process Queues - BEFORE\n",
		    (unsigned long) _p_my_id);
	    fflush(_p_stdout);
	}
#endif /* PDB */

	/*******************************************************
	 *
	 * This is the real garbage collection
	 *
	 *******************************************************/
#ifdef PARALLEL
        if (_p_cancels > 0)
        {
            /*
             * Send off cancels of any rref's that were defined since
             * the last gc, and any value notes that were discarded
	     * by utils.c:process_susp_queue().
             */
            _p_send_cancels();
        }
#endif
	structure_ptr_offset = -1;
	mark_and_reverse();
	backward_scan();
#ifdef PARALLEL
	if (_p_cancels > 0)
	{
	    /*
	     * Send off cancels of any orphaned rref or value_notes
	     * found during this gc.
	     */
	    _p_send_cancels();
	}
#endif 	
	forward_scan();
	if (structure_ptr_offset >= 0)
	    _p_structure_ptr = _p_structure_start_ptr + structure_ptr_offset;
	
#ifdef DEBUG
	/*
	ZeroOutMemory(_p_heap_ptr,
		      (_p_heap_hard_top - _p_heap_ptr) * CELL_SIZE));
	*/
#endif /* DEBUG */
	
#ifdef PDB
	if (GCDebug(5))
	{
	    fprintf(_p_stdout, "(%lu) GC:\t Process Queues - AFTER\n",
		    (unsigned long) _p_my_id);
	    _pdb_print_all_processes();
	    fprintf(_p_stdout, "(%lu) GC:\t End of Process Queues - AFTER\n",
		    (unsigned long) _p_my_id);
	    fflush(_p_stdout);
	}
#endif /* PDB */
	
#ifdef DEBUG	
	if (GCDebug(2))
	{
	    new_size = _p_heap_ptr - _p_heap_bottom;
	    fprintf(_p_stdout,"(%lu) GC:\tspace size = %lu\n",
		    (unsigned long) _p_my_id,
		    (unsigned long) (_p_heap_hard_top - _p_heap_bottom + 1));
	    fprintf(_p_stdout,"(%lu) GC:\t  old size = %lu\n",
		    (unsigned long) _p_my_id, (unsigned long) old_size);
	    fprintf(_p_stdout,"(%lu) GC:\t  new size = %lu\n",
		    (unsigned long) _p_my_id, (unsigned long) new_size);
	    fprintf(_p_stdout,"(%lu) GC:\t collected = %lu\n",
		    (unsigned long) _p_my_id,
		    (unsigned long) (old_size - new_size));
	    fprintf(_p_stdout,"(%lu) GC: Completed garbage collection\n",
		    (unsigned long) _p_my_id);
	    fflush(_p_stdout);
	}
#endif /* DEBUG */
    
	/*
	 * See if we collected enough to proceed...
	 */
#ifndef NO_VIRTUAL_MEMORY

	if (   ((_p_heap_ptr + min_collect) >= _p_heap_hard_top)
	    || ((_p_heap_ptr + min_collect) < _p_heap_ptr) /*check wraparound*/
	    || ((_p_heap_ptr + _p_heap_inc_proximity) >= _p_heap_hard_top) )
	{
	    /*
	     * The heap is not big enough.  So grow it.
	     *
	     * There is no need to add CANCEL_SIZE into the new_heap_size,
	     * since it is already accounted for by the difference
	     * between _p_heap_size and _p_heap_hard_top-_p_heap_bottom.
	     */
	    u_int_t new_heap_size = _p_heap_size + min_collect;
	    if (new_heap_size > _p_heap_hiwat)
	    {
		char buf[256];
		sprintf(buf, "Out of heap space and cannot expand heap anymore! (The current heap high water mark is %lu cells.)\n    Try running with a larger high water mark.  (See the -heap_hiwat flag on the runtime system)",
			(unsigned long) _p_heap_hiwat);
		_p_fatal_error(buf);
	    }
	    new_heap_size = MIN(new_heap_size + _p_heap_increment,
				_p_heap_hiwat);

	    _p_grow_heap(new_heap_size);
	}
	
	/*
	 * Now calculate the new _p_heap_gc_top, based on the various
	 * _p_heap_* constraints.
	 */
	if (_p_heap_free_after_gc == 0)
	    _p_heap_gc_top = _p_heap_hard_top - _p_gc_slack;
	else
	    _p_heap_gc_top = MIN(_p_heap_hard_top - _p_gc_slack,
				 MAX(_p_heap_bottom + _p_heap_lowat,
				     _p_heap_ptr + _p_heap_free_after_gc));
	done_with_gc = 1;
	
#else  /* !NO_VIRTUAL_MEMORY */
	
	/*
	 * Check for enough space and watch for wraparound, in case
	 * min_collect is very large.
	 */
	if (   ((_p_heap_ptr + min_collect) < _p_heap_hard_top)
	    && ((_p_heap_ptr + min_collect) >= _p_heap_ptr) )
	{
	    done_with_gc = 1;
	}
	else
	{
#ifdef PARALLEL	    
	    if(_p_multiprocessing)
	    {
		fprintf(_p_stdout,
			"(%lu) Out of heap space on node %lu! (Current heap size is %lu kcells.)\n    Trying to recover by broadcasting a gc request, but it doesn't look good...\n    Try running with a larger heap. (See the -heap_size flag on the runtime system)",
			(unsigned long) _p_my_id, (unsigned long) _p_my_id,
			(unsigned long) _p_heap_size);
		fflush(_p_stdout);
		_p_send_collect();
		_p_process_messages(RCV_COLLECT);
	    }
	    else
	    {
#endif /* PARALLEL */
		char buf[256];
		sprintf(buf, "Out of heap space! (Current heap size is %lu kcells.)\n    Try running with a larger heap.  (See the -heap_size flag on the runtime system)",
			(unsigned long) _p_heap_size);
		_p_fatal_error(buf);
#ifdef PARALLEL	    
	    }
#endif /* PARALLEL */
	}
	
#endif /* !NO_VIRTUAL_MEMORY */
    
    }
    
#ifdef GAUGE
    TIMER(gc_stop);
    lapse_time(gc_start, gc_stop, gc_stop);
    timer_add(gc_stop, _p_gauge_stats.gc_time, _p_gauge_stats.gc_time);
#endif /* GAUGE */
} /* _p_collect_garbage() */


/*
 * mark_and_reverse()
 *
 * For each of the off heap pointers (process queues, IRT, etc),
 * mark the data structures that they point too (recursively decending
 * tuples), and reverse the offheap pointers that point to onheap
 * structures.
 *
 * This procedure combines the mark stage with the Morris gc
 * algorithm's initial offheap pointer reversal stage.
 *
 * If multiple off-heap pointers point to a single data structure
 * on the heap, then this routine will chain the references.
 * (i.e., The on-heap location will point to an off-heap location
 * which will point to another off-heap location, etc, until the
 * actual data is reached in an off-heap location.)
 *
 * As we mark data structures, keep track of the number of cells
 * in use on the heap by bumping the global 'cells_in_use' counter.
 */
static void mark_and_reverse()
{
#ifdef PDB
    proc_record_t *pr;
#endif /* PDB */    
    int_t i;
    cells_in_use = 0;

    m_and_r_process_queue(_p_active_qf, _p_active_qb);
    m_and_r_process_queue(_p_globsusp_qf, _p_globsusp_qb);
#ifdef PDB    
    m_and_r_process_queue(_pdb_pending_qf, _pdb_pending_qb);
#endif /* PDB */
    
#ifdef PARALLEL
    if (_p_multiprocessing)
    {
	m_and_r_irt();
    }
#endif /* PARALLEL */

    if (_p_current_proc != (proc_record_t *) NULL)
    {
	m_and_r_current_proc();
    }
    else
    {
	/*
	 * Zero out the registers, so that we don't have any stray
	 * pointers in the them.
	 */
	int_t i;
	for (i = 0; i < NUM_A_REGS; i++)
	    _p_a_reg[i] = (cell_t) 0;
    }
    
    /*
     * Mark and reverse the gc reference stack.
     * In order to support the ability
     * to garbage collect at any time, some procedures will
     * need to have local variables garbage collected.
     * The gc refererence stack contains a list of pointers
     * to variables that need to be updated during a gc.
     * The PushGCReference(&v) and PopGCReference() should be
     * used to maintain the stack.
     */
    for (i = 0; i < _p_gc_ref_stack_top; i++)
	m_and_r_pointer(_p_gc_ref_stack[i]);

#ifdef PDB
    /*
     * Print out orphaned process messages now while the heap is
     * still relatively sane.
     */
    for (pr = _pdb_all_qf; pr != (proc_record_t *) NULL; pr = pr->pdb_next)
    {
	if (pr->header.mark == 0)
	    _pdb_orphaned_proc_record(pr);
	else
	    pr->header.mark = 0;
    }
#endif /* PDB */    
} /* mark_and_reverse() */


/*
 * m_and_r_proc_record()
 *
 * Mark and reverse the pointers originating from the arguments of
 * this process record.
 */
static void m_and_r_proc_record(proc_record)
proc_record_t *proc_record;
{
    cell_t **arg = &(proc_record->args[0]);
    cell_t **last_arg = arg + proc_record->proc->arity;
    while (arg < last_arg)
    {
	m_and_r_pointer(arg++);
    }
#ifdef PDB
    proc_record->header.mark = 1;
#endif /* PDB */    
} /* m_and_r_proc_record() */


/*
 * m_and_r_process_queue()
 *
 * Mark and reverse the pointers originating from the arguments of
 * the process records in this process queue.
 */
static void m_and_r_process_queue(QF, QB)
proc_record_t *QF, *QB;
{
    proc_record_t *proc_record = QF;

    while (proc_record != (proc_record_t *) NULL)
    {
	m_and_r_proc_record(proc_record);
	proc_record = proc_record->next;
    }
} /* m_and_r_process_queue() */


/*
 * m_and_r_suspension_queue()
 *
 * Mark and reverse the pointers originating from the arguments of
 * the process records in this suspension queue (queue hung off
 * an undef or rref).
 *
 * Suspension queues are circular queues, where the last proc_record
 * in the queue points back to the first.  (This is done so that
 * in a uniprocessor run, a suspension queue can be appended to
 * the active queue with just a couple pointer manipulations -- no
 * need to traverse the whole queue.)
 */
static void m_and_r_suspension_queue(queue_head)
proc_record_t *queue_head;
{
    proc_record_t *proc_record = queue_head;
    do
    {
	/* We do not need to do anything with value notes */
	if (IsProcRecord(proc_record))
	    m_and_r_proc_record(proc_record);
	proc_record = proc_record->next;
    } while (proc_record != queue_head);
} /* m_and_r_suspension_queue() */


/*
 * m_and_r_irt()
 *
 * Mark and reverse the pointers originating from the IRT.
 *
 * At the same time, reconstruct the IRT free list so that
 * the entries are ascending by index, to improve locality.
 */
static void m_and_r_irt()
{
    irt_t *irt_entry;
    int_t i, last_free;

    last_free = -1;
    for (i = _p_irt_size - 1, irt_entry = _p_irt + i;
	 i >= 0;
	 i--, irt_entry--)
    {
	if (irt_entry->weight == 0)
	{
	    /* Free entry */
	    irt_entry->u.next_free = last_free;
	    last_free = i;
	}
	else
	{
	    /* Used entry */
	    m_and_r_pointer(&(irt_entry->u.ptr));
	}
    }
    _p_irt_free_list = last_free;
    
} /* m_and_r_irt() */


/*
 * m_and_r_current_proc()
 *
 * Mark and reverse the pointers originating from the currently
 * scheduled process.  This includes:
 *	Argument registers (_p_a_reg[])
 *	_p_structure_start_ptr
 */
static void m_and_r_current_proc()
{
    int_t i;

    /*
     * Mark and reverse the argument registers.
     *
     * Normally, _p_first_unused_register is 256, which means we'll
     * gc all of the registers.  But in some instances
     * (i.e., save_arguments()) we know exactly how many registers
     * need to be gc'd.  By temporarily overriding this value in those
     * spots, we can control the gc better.
     *
     * Zero out any unused registers, so that we don't have any stray
     * pointers in the them.
     */
    for (i = 0; i < _p_first_unused_register; i++)
    {
	if (_p_a_reg[i] != (cell_t) 0)
	    m_and_r_pointer(&(_p_a_reg[i]));
    }
    for (i = _p_first_unused_register; i < NUM_A_REGS; i++)
	_p_a_reg[i] = (cell_t) 0;
    
    /*
     * Mark and reverse the _p_structure_start_ptr, if it points to
     * something on the heap.  Also, compute the offset of the
     * _p_structure_ptr from it.
     */
    if (OnHeap(_p_structure_start_ptr))
    {
	structure_ptr_offset = _p_structure_ptr - _p_structure_start_ptr;
	m_and_r_pointer(&_p_structure_start_ptr);
    }

    /*
     * Mark and reverse the _p_suspension_var if it points to something
     */
    if (   _p_suspension_var != (cell_t *) NULL
	&& _p_suspension_var != (cell_t *) -1 )
    {
	m_and_r_pointer(&_p_suspension_var);
    }
} /* m_and_r_current_proc() */


/*
 * m_and_r_pointer()
 *
 * 'source' is the address of an offheap pointer that we want to reverse.
 *
 * Dereference it, mark the data structure that it points to if we have
 * not yet done so, and then reverse this pointer.
 *
 * Recursively decend through the pointers of the the data structure
 * to mark them.  However, to avoid deep recursion on long lists,
 * the last tuple argument is dealt with specially by having a loop
 * around the body of this procedure -- so the tails of lists are
 * dealt with iteratively, rather than recursively (basic tail
 * recursion optimization).
 */
static void m_and_r_pointer(source)
cell_t *source;
{
    cell_t *target;
    u_int_t tag, size;

    while (1)
    {
	/*
	 * Dereference the pointer, and store back the dereferenced
	 * pointer value into the source to remove any indirection.
	 * After this dereference, we are guaranteed to be pointing
	 * at either a data header cell, or at a reversed pointer.
	 *
	 * Note that this exploits the fact the pointer
	 * dereferencing stops when a reversed pointer is
	 * encountered.  This is exactly the behavior we
	 * want.  For example, suppose two offheap pointers
	 * point to the same onheap data structure.  When dereferencing
	 * the first one, the dereference will stop at the data
	 * structure on the heap as normal. Then the pointer reversal
	 * will cause the first cell of the onheap data
	 * structure to be set to a reversed pointer.  So when we go to
	 * defererence the second offheap pointer, which now points to the
	 * reversed pointer, we want the dereference to stop at the
	 * reversed pointer, which it does.
	 */
	Dereference((cell_t *), *((cell_t **) source), target);
	if (*((cell_t **) source) != target)
	    *((cell_t **) source) = target;
	
	if (OffHeap(source) && OnHeap(target))
	{
	    /*
	     * The source is offheap, and the target is onheap.
	     * So we need to:
	     *   Reverse the pointer
	     *   If the target is not yet marked or reversed, then mark it,
	     *       reverse it, and decend through its pointers
	     */
	    if (!IsReversed(target) && !IsMarked(target))
	    {
		/*
		 * The target is not a reversed pointer, and is not yet
		 * marked, so it is a data
		 * structure that we have not yet visited, so:
		 *   Mark the data structure
		 *   Update the count of cells_in_use
		 *   Recurse through the data structure's pointers
		 */

		((data_header_t *) target)->mark = 1;
		tag = ((data_header_t *) target)->tag;
		size = ((data_header_t *) target)->size;
		cells_in_use += _p_size_with_trailer(tag, size);

#ifdef DEBUG
		if (GCDebug(8))
		{
		    fprintf(_p_stdout,
			    "GC: m_and_r_pointer(): m_and_r 0x%lx (cells_in_use=%lu): ",
			    (unsigned long) target,
			    (unsigned long) cells_in_use);
		    _p_print_term(_p_stdout, target);
		    fprintf(_p_stdout, "\n");
		    fflush(_p_stdout);
		}
#endif /* DEBUG */		
		
		if (IsTuple(target))
		{
		    if (size > 0)
		    {
			/*
			 * This is a non-empty tuple, so mark its subterms
			 *
			 * We must first reverse this pointer, so as to
			 * avoid infinite recursive descent.
			 */
			cell_t *t = target + 1;
			ReversePointer(source, target);
			while(size > 1)
			{
			    /*
			     * If *t is a NULL pointer, then
			     * the gc occurred in the middle of filling a
			     * tuple (between build_static and last put_value),
			     * so there are NULL pointer arguments still.
			     */
			    if (*((cell_t **) t) != (cell_t *) NULL)
				m_and_r_pointer(t, size);
			    t++;
			    size--;
			}
			if (*((cell_t **) t) != (cell_t *) NULL)
			{
			    source = t;	/* tail recursion optimization */
			    continue;
			}
		    }
		    else
		    {
			/*
			 * This is an empty tuple, so just reverse it.
			 */
			ReversePointer(source, target);
		    }
		}
		else if (IsUnknown(target) && SuspensionsAt(target))
		{
		    /*
		     * This is an unknown with suspensions, so recursively
		     * mark and reverse the suspended processes.
		     *
		     * We must first reverse this pointer, so as to
		     * avoid infinite recursive descent.
		     */
		    proc_record_t *suspension_queue = SuspendedProcs(target);
		    ReversePointer(source, target);
		    m_and_r_suspension_queue(suspension_queue);
		}
		else
		{
		    /*
		     * This is a data structure that does not contain pointers.
		     */
		    ReversePointer(source, target);
		}
	    }
	    else
	    {
		/*
		 * Already visited this data structure, so just
		 * reverse the this pointer.
		 */
		ReversePointer(source, target);
	    }
	}
	else
	{
	    /*
	     * Either the source is onheap, or the target is offheap.
	     * In either case, we just need to mark the data structure,
	     * if it has not already been done.
	     */
	    if (!IsReversed(target) && !IsMarked(target))
	    {
		/*
		 * The target is an unmarked data structure, so;
		 *   Mark the data structure
		 *   Update the count of cells_in_use
		 *   Recurse through the data structure's pointers
		 *
		 * Note: !IsReversed(target) imples that target is a data
		 *       structure, since we know that target must be either
		 *       a reversed pointer or a real data structure.
		 */

		if (OnHeap(target))
		{
		    ((data_header_t *) target)->mark = 1;
		    tag = ((data_header_t *) target)->tag;
		    size = ((data_header_t *) target)->size;
		    cells_in_use += _p_size_with_trailer(tag, size);
#ifdef DEBUG
		    if (GCDebug(8))
		    {
			fprintf(_p_stdout,
				"GC: m_and_r_pointer(): mark    0x%lx (cells_in_use=%lu): ",
				(unsigned long) target,
				(unsigned long) cells_in_use);
			_p_print_term(_p_stdout, target);
			fprintf(_p_stdout, "\n");
			fflush(_p_stdout);
		    }
#endif /* DEBUG */		
		}

		if (IsTuple(target))
		{
		    if ((size = ((data_header_t *) target)->size) > 0)
		    {
			/*
			 * This is a non-empty tuple, so mark its subterms
			 */
			cell_t *t = target + 1;
			while(size > 1)
			{
			    /*
			     * If *t is a NULL pointer, then
			     * the gc occurred in the middle of filling a
			     * tuple (between build_static and last put_value),
			     * so there are NULL pointer arguments still.
			     */
			    if (*((cell_t **) t) != (cell_t *) NULL)
				m_and_r_pointer(t, size);
			    t++;
			    size--;
			}
			if (*((cell_t **) t) != (cell_t *) NULL)
			{
			    source = t;	/* tail recursion optimization */
			    continue;
			}
		    }
		}
		else if (IsUnknown(target) && SuspensionsAt(target))
		{
		    /*
		     * This is an unknown with suspensions, so recursively
		     * mark and reverse the suspended processes.
		     */
		    proc_record_t *suspension_queue = SuspendedProcs(target);
		    m_and_r_suspension_queue(suspension_queue);
		}
		/*
		 * else:  this is a data structure that does
		 *        not contain pointers.
		 */
	    }
	}
	break;
    }
} /* m_and_r_pointer() */


/*
 * backward_scan();
 *
 * Do the first (backward) scan in the Morris gc.
 *
 * Take all reversed pointers (from the previous offheap pointer reversal,
 * and from reversed downward pointers from a previous step of this scan)
 * and reverse them back.  But in the process update the original
 * pointers so that it points to where this data will 
 * eventually be put.
 *
 * So this has the effect of updating all off-heap
 * starting pointers and all downward pointers
 * so that they point to where the data will end up.
 */
static void backward_scan()
{
    cell_t *cp, *cp_target;
    cell_t *new_loc;
    u_int_t tag;
    u_int_t size, i;
    cell_t *arg, *arg_target;
    proc_record_t *pr1, *pr2, *pr_first;
#ifdef PARALLEL
    cell_t **cancel_list_entry;
#endif    

    /*
     * new_loc is used to point to where data structure will
     * end up after the gc.
     */
    new_loc = _p_heap_bottom + cells_in_use - 1;

    cp = heap_last_used;
    while (cp >= _p_heap_bottom)
    {
	/*
	 * The only place we should encounter pointers on the
	 * heap is within tuples, undefs, and rrefs, and in previous
	 * undefs or rrefs that have been defined a value and 
	 * thus just contain a pointer to the real data.  However,
	 * the mark and reverse stage will have dereferenced any
	 * pointers, so the overwritten undefs and rrefs can be ignored.
	 *
	 * So just skip past 0's and references on the heap.
	 */
	if (*cp == 0 || IsRef(cp))
	{
	    cp--;
	    continue;
	}

	switch (((data_header_t *) cp)->tag)
	{
	case TRAILER_TAG:
	    /*
	     * We've run into a generic trailer tag.
	     * So we need to jump to the head of this data structure,
	     * using the size stored in the trailer cell to get us there.
	     * Also, if this data structure is marked (a reversed
	     * pointer in the header cell also implies marked),
	     * then we also need to decrement new_loc by the appropriate
	     * amount so that it points to the *beginning* of where
	     * this data structure will appear on the heap after
	     * the gc (instead of to the end, like it currently does).
	     */
	    size = ((data_header_t *) cp)->size;
	    cp -= size;
	    if (IsReversed(cp) || IsMarked(cp))
		new_loc -= size;
	    break;
	    
	case UNDEF_TRAILER_TAG:
	    /*
	     * We're at the end of an undef.  So get to the beginning,
	     * just like in the generic trailer tag case above.
	     */
	    size = UndefSizeWithoutTrailer();
	    cp -= size;
	    if (IsReversed(cp) || IsMarked(cp))
		new_loc -= size;
	    break;
	    
	case RREF_TRAILER_TAG:
	    /*
	     * We're at the end of a rref.  So get to the beginning,
	     * just like in the generic trailer tag case above.
	     */
	    size = RrefSizeWithoutTrailer();
	    cp -= size;
	    if (IsReversed(cp) || IsMarked(cp))
		new_loc -= size;
	    break;
	}
	
	/*
	 * Since we've already skipped 0 fill and pointers (previous
	 * undefs and rrefs that have defined), and compensated for
	 * multiple cell data structures via the trailer cell, 
	 * the only things we can encounter here are:
	 *	reversed pointers
	 *	data structures (marked or unmarked)
	 *
	 * We should never encounter a normal pointer at this
	 * level in the loop.
	 */
	
#ifdef DEBUG
	if (IsRef(cp) || cp < _p_heap_bottom || cp > heap_last_used )
	{
	    _p_fatal_error("backward_scan(): Corrupt heap encountered during garbage collection! (Location 1)");
	}
#endif	    

	while (IsReversed(cp))
	{
	    /*
	     * This loop un-reverses the pointer.  If the
	     * reversed pointers are chained (reversed pointer to
	     * reversed pointer ... to real data), then this
	     * loop will update all of them so that they all
	     * point to where this data will eventually be put.
	     *
	     * This heap location (cp) is left with its
	     * original data.
	     */
	    cp_target = ClearReversed(*((cell_t **) cp));
	    *((cell_t **) cp) = *((cell_t **) cp_target);
	    *((cell_t **) cp_target) = new_loc;
	}

	/*
	 * Now that we've unreversed the header cell (if it was reversed),
	 * we must be pointing at the header cell of a data structure.
	 */
	tag = ((data_header_t *) cp)->tag;
	if (IsMarked(cp))
	{
#ifdef DEBUG
	    if (GCDebug(8))
	    {
		fprintf(_p_stdout,
			"GC: backward_scan(): marked  0x%lx (new_loc=0x%lx): ",
			(unsigned long) cp,
			(unsigned long) new_loc);
		switch (tag)
		{
		case TUPLE_TAG:
		    fprintf(_p_stdout, "{...}/%lu", 
			    (unsigned long) ((data_header_t *)cp)->size);
		    break;
		default:
		    _p_print_term(_p_stdout, cp);
		    break;
		}
		fprintf(_p_stdout, "\n");
		fflush(_p_stdout);
	    }
#endif /* DEBUG */		

	    switch (tag)
	    {
	    case TUPLE_TAG:
		/*
		 * Go through the tuple arguments and reverse
		 * any downward pointers (within the heap).
		 *
		 * If multiple downward pointers point to the same place,
		 * this code will end up chaining them so that
		 * there is a chain of upward reversed pointers
		 * with the last thing in the chain (and at the
		 * highest address) containing the actual data.
		 *
		 * We do not need to worry about pointers to ourself.
		 * We are also guaranteed that the arguments will already
		 * be dereferenced from the initial mark_and_reverse stage.
		 */
		arg = cp + 1;
		size = ((data_header_t *) cp)->size;
		for (i = 0; i < size; i++, arg++)
		{
		    arg_target = *((cell_t **) arg);
		    if (arg_target == (cell_t *) NULL)
		    {
			/*
			 * The gc occurred in the middle of filling a
			 * tuple (between build_static and last put_value),
			 * so there are NULL pointer arguments still.
			 */
			continue;
		    }
#ifdef DEBUG
		    /* This better be a pointer */
		    if (!IsRef(arg))
		    {
			_p_fatal_error("backward_scan(): Corrupt heap encountered during garbage collection! (Location 2)");
		    }
#endif		    
		    if (   (_p_heap_bottom <= arg_target)
			&& (arg_target < arg) )
		    {
			ReversePointer(arg, arg_target);
		    }
		}
		break;

	    case UNDEF_TAG:
	    case INT_TAG:
	    case STRING_TAG:
	    case DOUBLE_TAG:
		/* Do nothing */
		break;

	    case RREF_TAG:
#ifndef PARALLEL	
		_p_fatal_error("Illegal remote reference tag found during garbage collection backward scan");
#endif		
		break;

	    default:
		_p_fatal_error("Illegal tag found during garbage collection backward scan");
		break;
	    }
	    
	    new_loc--;
	}
	else
	{
	    if ( tag == UNDEF_TAG
#ifdef PARALLEL
		|| tag == RREF_TAG
#endif
		)
	    {
		if ( SuspensionsAt(cp) )
		{
		    /*
		     * We have found an unmarked undef or rref that has
		     * suspensions hanging off of it.  Therefore, these
		     * are orphaned processes.  So deal with them as such.
		     */
		    pr1 = pr_first = SuspendedProcs(cp);
		    do
		    {
			pr2 = pr1;
			pr1 = pr1->next;
			if (pr2->header.tag == PROC_RECORD_TAG)
			{
#ifdef PDB
			    /*
			     * We've encountered an orphaned process record.
			     * We printed out orphaned process messages at
			     * the end of mark_and_reverse() when the heap
			     * was still relatively sane.  So just dequeue
			     * it here.
			     */
			    _pdb_dequeue_process(pr2);
#endif /* PDB */
			    _p_free_proc_record(pr2);
			}
#ifdef PARALLEL			
			else if (pr2->header.tag == VALUE_NOTE_TAG)
			{
			    /*
			     * We've encountered an orphaned value note.
			     * Therefore, we need to:
			     *   Generate an error message, if appropriate.
			     *   Put it on the appropriate cancel_list so
			     *     that a MSG_CANCEL message will get
			     *     generated for it.
			     * The value note will get freed from within
			     * _p_send_cancels().
			     */
#ifdef PDB			
			    _pdb_orphaned_value_note((value_note_t *) pr2);
#endif /* PDB */
			    cancel_list_entry = _p_cancel_lists
				+ ((value_note_t *)pr2)->node;
			    ((value_note_t *)pr2)->next
				= (value_note_t *) *cancel_list_entry;
			    *cancel_list_entry = (cell_t *) pr2;
			    _p_cancels++;
			}
#endif /* PARALLEL */			
			else
			{
			    _p_fatal_error("backward_scan(): Illegal tag found in orphaned process suspension queue");
			}
		    } while (pr1 != pr_first);
		}

#ifdef PARALLEL	
		if (IsUndef(cp))
		{
#endif		    
		    /*
		     * Zero out the memory for this undef, so that we skip it
		     * in the forward scan.
		     */
		    *cp = 0;
		    *(cp + 1) = 0;
#ifdef PARALLEL	
		}
		else if (IsRref(cp))
		{
		    /*
		     * This is an unmarked remote reference (RREF_TAG).
		     * So add it to the appropriate free list so
		     * that we can cancel it between scans.
		     *
		     * The heap space for this rref will get zero'd out
		     * within _p_send_cancels().
		     */
		    cancel_list_entry = _p_cancel_lists + ((rref_t *)cp)->node;
		    ((rref_t *)cp)->node = (u_int_t) *cancel_list_entry;
		    *cancel_list_entry = cp;
		    _p_cancels++;
		}
#endif
	    }
	}
	
	cp--;
    }

#ifdef DEBUG
    if (new_loc != _p_heap_bottom - 1)
    {
	char buf[256];
	sprintf(buf,
		"backward_scan(): new_loc (0x%lx) != _p_heap_bottom - 1 (0x%lx) at end of backward scan in gc",
		(unsigned long) new_loc,
		(unsigned long) (_p_heap_bottom - 1) );
	_p_fatal_error(buf);
    }
#endif /* DEBUG */    
} /* backward_scan() */


/*
 * forward_scan();
 *
 * Do the second (forward) scan in the Morris gc.
 *
 * When this routine is called, the following are true:
 *   1) All off-heap pointers to data now point to the new
 *	location (i.e. where that data will end up after the gc)
 *   2) All downward pointers to data point now point to the new location.
 *   3) All upward pointers to data are untouched.
 *   4) There are not reversed pointers anywhere (on or off the heap).
 *
 * This scan takes all reversed pointers (reversed upward pointers
 * from a previous step of this scan) and reverses them back.
 * But in the process it updates the original
 * pointers so that they points to where this data will
 * eventually be put.
 *
 * So this has the effect of updating all upward pointers
 * so that they point to where the data will end up.
 */
static void forward_scan()
{
    cell_t *cp, *cp_target;
    cell_t *new_loc;
    u_int_t tag;
    u_int_t size, i, size_in_cells;
    cell_t *arg, *arg_target;

    /*
     * new_loc is used to point to where data structure will
     * end up after the gc.
     */
    new_loc = _p_heap_bottom;

    cp = _p_heap_bottom;
    while (cp <= heap_last_used)
    {
	/*
	 * The only place we should encounter pointers on the
	 * heap is within tuples, undefs, and rrefs, and in previous
	 * undefs or rrefs that have been defined a value and 
	 * thus just contain a pointer to the real data.  However,
	 * the mark and reverse stage will have dereferenced any
	 * pointers, so the overwritten undefs and rrefs can be ignored.
	 *
	 * So just skip past 0's and references on the heap.
	 */
	if (*cp == 0 || IsRef(cp))
	{
	    cp++;
	    continue;
	}
	
	/*
	 * Since we've already skipped 0 fill and pointers (previous
	 * undefs and rrefs that have defined),
	 * the only things we can encounter here are:
	 *	reversed pointers
	 *	data structures (marked or unmarked)
	 *
	 * We should never encounter a normal pointer at this
	 * level in the loop.
	 */

#ifdef DEBUG
	if (IsRef(cp) || cp < _p_heap_bottom || cp > heap_last_used )
	{
	    _p_fatal_error("forward_scan(): Corrupt heap encountered during garbage collection! (Location 3)");
	}
#endif	    

	while (IsReversed(cp))
	{
	    /*
	     * This loop un-reverses the pointer.  If the
	     * reversed pointers are chained (reversed pointer to
	     * reversed pointer ... to real data), then this
	     * loop will update all of them so that they all
	     * point to where this data will eventually be put.
	     *
	     * This heap location (cp) is left with its
	     * original data.
	     */
	    cp_target = ClearReversed(*((cell_t **) cp));
	    *((cell_t **) cp) = *((cell_t **) cp_target);
	    *((cell_t **) cp_target) = new_loc;
	}

#if defined(DEBUG) && defined(PCN_ALIGN_DOUBLES)
	if (!OddWordDoubleAligned(cp) || !OddWordDoubleAligned(new_loc))
	{
	    _p_fatal_error("forward_scan(): Corrupt heap encountered during garbage collection! Data structures are not properly double word aligned. (Location 4)");
	}
#endif	    

	/*
	 * Now that we've unreversed the header cell (if it was reversed),
	 * we must be pointing at the header cell of a data structure.
	 */
	tag = ((data_header_t *) cp)->tag;
	size = ((data_header_t *) cp)->size;
	size_in_cells = _p_size_with_trailer(tag, size);
	
	if (IsMarked(cp))
	{
	    switch (tag)
	    {
	    case TUPLE_TAG:
		/*
		 * Go through the tuple arguments and reverse
		 * any upward pointers (within the heap).
		 *
		 * If multiple upward pointers point to the same place,
		 * this code will end up chaining them so that
		 * there is a chain of downward reversed pointers
		 * with the last thing in the chain (and at the
		 * lowest address) containing the actual data.
		 *
		 * We do not need to worry about pointers to ourself.
		 * We are also guaranteed that the arguments will already
		 * be dereferenced from the initial mark_and_reverse stage.
		 */
		arg = cp + 1;
		for (i = 0; i < size; i++, arg++)
		{
		    arg_target = *((cell_t **) arg);
		    if (arg_target == (cell_t *) NULL)
		    {
			/*
			 * The gc occurred in the middle of filling a
			 * tuple (between build_static and last put_value),
			 * so there are NULL pointer arguments still.
			 */
			continue;
		    }
#ifdef DEBUG
		    /* This better be a pointer */
		    if (!IsRef(arg))
		    {
			_p_fatal_error("forward_scan(): Corrupt heap encountered during garbage collection! (Location 5)");
		    }
#endif		    
		    if (   (arg < arg_target)
			&& (arg_target < _p_heap_hard_top) )
		    {
			/*
			 * Reverse the pointer, but so that the
			 * reversed pointer (*arg_target) points
			 * to where this data structure will end up,
			 * so that when the pointer is reversed back
			 * in a later iteration of this scan, the
			 * tuple argument in the new data structure
			 * will be updated.
			 */
			*((cell_t **) arg) = *((cell_t **) arg_target);
			*((cell_t **) (arg_target)) =
			    SetReversed(new_loc + i + 1);
		    }
		}
		break;

	    case UNDEF_TAG:
	    case INT_TAG:
	    case STRING_TAG:
	    case DOUBLE_TAG:
		/* Do nothing */
		break;

	    case RREF_TAG:
#ifndef PARALLEL		
		_p_fatal_error("Illegal remote reference tag found during garbage collection forward scan");
#endif		
		break;

	    default:
		_p_fatal_error("Illegal tag found during garbage collection forward scan");
		break;
	    }

	    /*
	     * Now copy this data structure to its new location,
	     * clear the mark, and bump new_loc appropriately.
	     */
	    if (new_loc != cp)
	    {
		memmove(new_loc, cp, (size_in_cells * CELL_SIZE));
	    }
	    ((data_header_t *) new_loc)->mark = 0;
	    new_loc += size_in_cells;
	}
	
	cp += size_in_cells;
    }

#ifdef DEBUG
    if (new_loc != _p_heap_bottom + cells_in_use)
    {
	char buf[256];
	sprintf(buf,
		"forward_scan(): new_loc (0x%lx) != _p_heap_bottom + cells_in_use (0x%lx) at end of forward scan in gc",
		(unsigned long) new_loc,
		(unsigned long) (_p_heap_bottom + cells_in_use) );
	_p_fatal_error(buf);
    }
#endif /* DEBUG */    

    /*
     * We've completed the garbage collection, so reset the heap pointer.
     */
    _p_heap_ptr = new_loc;
} /* forward_scan() */
