/*
 * Mach Operating System
 * Copyright (c) 1993 Carnegie Mellon University
 * All Rights Reserved.
 * 
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 * 
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 * 
 * Carnegie Mellon requests users of this software to return to
 * 
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 * 
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */
/*
 * HISTORY
 * $Log:	gprof_support.c,v $
 * Revision 2.2  93/08/11  14:32:46  mrt
 * 	Moved from UX
 * 	[93/08/06            bershad]
 * 
 * Revision 2.3  93/05/18  12:05:08  rvb
 * 	Lint
 * 
 * Revision 2.2  93/02/16  15:43:41  mrt
 * 	Let there be pc_sampling, too!
 * 	And a bunch of cleanup.
 * 	[93/01/13            rvb]
 * 	created.
 * 	[92/05/03            cmaeda]
 * 
 *
 * File:	gprof_support.c
 * Author:	Chris Maeda
 * Date:	March 1992
 *
 * Defines routines for in-server profiling a la gprof.
 *
 */



#include <mach.h>
#include <mach/pc_sample.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <cthreads.h>
#include <gprof.h>

/*
 * GPROF
 *
 *
 * _mcount protects its data structures with a mutex.  Therefore
 * we have to be very careful to not call any functions defined
 * outside of this file since they can recursively call _mcount.
 * Note that the BSD trick of setting a variable to turn off
 * _mcount in recursive calls doesn't work since we're multithreaded.
 *
 * This module is safe for both user programs and UX.
 * The only functions defined outside of this file are various
 * kernel stubs (eg vm_allocate) from libmach and the timeout
 * function which is used to put the pc sampling function on the
 * callout queue.  However, timeout is never called from _mcount
 * so we're ok.
 
 */
/*
 * UNFORTUNATELY, we profile ALL libraries, so
 * we can no longer use mutex's in the critical call count path.
 * But spin_try_lock is in gcc
 * asm(""), so that's all we can use.
 *
 */


static int monstartup_needed = 1;



/*
 * UX should define gprof_ux_server to prevent gprof_support2 from
 * being pulled in.  Otherwise, gprof_ux_server will be pulled in from
 * gprof_support, giving access to the other mon* routines.
 * Set to TRUE if running in UX 
 */

extern int gprof_ux_server;		

int gprof_debug;

spin_lock_t gprof_spin = SPIN_LOCK_INITIALIZER;
		/* if you can't get it; drop datum */

int gprof_do_profiling = 0;	/* do profiling when non-zero */
vm_address_t gprof_bounds = 0;	/* counter of bounds check errors */
vm_address_t gprof_text_low, gprof_text_high;

/*
 * Call graph data structures.
 */
callarc_t **gprof_calls = (callarc_t **) NULL; /* hash table of callarcs */
callarc_t *gprof_callarc_freelist = (callarc_t *) NULL;
callarc_t *gprof_callarc_pagelist = (callarc_t *) NULL;


struct mutex gprof_mutex = MUTEX_INITIALIZER;

/*
 * Synchronization mechanisms used from application-level implementations
 */
struct condition gprof_sample_condition = CONDITION_INITIALIZER;
cthread_t gprof_sample_thread;




/*
 * PC histogram data structures.
 */ 



int gprof_tick_sample;			/* ticks per sample */
int gprof_sample_interval = 500;	/* task_get_sampled_pcs every 500ms */
sampled_pc_seqno_t gprof_seqno, gprof_oseqno;
unsigned long gprof_seqnum_gaps = 0;
sampled_pc_t gprof_pc_samples[512];
void *gprof_sample_loop_user();
void gprof_sample_loop_ux();


struct gprof_target_info {
    int		initialized;
    mach_port_t task;
    vm_offset_t low_pc;
    vm_offset_t high_pc;
    vm_offset_t pchist_size;
    CHUNK       *pchist_data;		/* histogram of pc values */    
};

struct gprof_target_info gt;




gprof_warning(s)
    char *s;
{
    printf(s);
    if (!gprof_ux_server)  {
	exit(10);
    }
    return;
}



int
gprof_set_target_info(task, low_pc, high_pc)
    mach_port_t	task;
    vm_offset_t low_pc;    
    vm_offset_t high_pc;
{
    extern int _eprol(); /* first function in text seg */
    extern int etext; /* comes right after text seg */
    
    gt.task = task;
    if (low_pc == 0 && high_pc == 0)  {    
	gt.low_pc = (unsigned long)_eprol;
	gt.high_pc = (unsigned long)&etext;
    } else {
	gt.low_pc = low_pc;
	gt.high_pc = high_pc;
    }
    gt.pchist_size = (gt.high_pc -
				gt.low_pc) / CG_RESOL;
    gt.high_pc = gt.low_pc +
    				(gt.pchist_size - 1) * CG_RESOL;    
    gt.initialized = TRUE;
    return 0;
}

gt_init()
{
    mutex_lock(&gprof_mutex);
    if (gt.initialized == FALSE)  {
	gprof_set_target_info(mach_task_self(), 0, 0);
    }
    mutex_unlock(&gprof_mutex);
}




/*
 * Memory allocator for callarcs.
 */
void
gprof_callarc_get(n)
	int n;			/* this many callarcs */
{
    kern_return_t kr;
    vm_address_t page;
    vm_size_t size;
    int num_structs, i;
    register callarc_t **arcp, *arcs;

    size = n * sizeof(callarc_t);
    kr = vm_allocate(mach_task_self(), &page, size, TRUE);
    if (kr != KERN_SUCCESS) {
	gprof_warning("_mcount: allocating callarcs");
	return;
    }

    /*
     * First callarc of each page is a pointer to the next region
     * of callarcs.  ca_count holds the size.
     */
    arcs = (callarc_t *)page;
    arcs->ca_count = size;
    arcs->ca_next = gprof_callarc_pagelist;
    gprof_callarc_pagelist = arcs;
    arcs++;			/* point to next callarc */

    /*
     * Build a freelist of callarcs in the new pages.
     */
    num_structs = (size - sizeof(callarc_t)) / sizeof(callarc_t);
    arcp = &gprof_callarc_freelist;	/* freelist should be NULL */
    for (i = 0; i < num_structs; i++) {
	*arcp = arcs;
	arcp = &arcs->ca_next;
	arcs++;
    }
    *arcp = (callarc_t *) NULL; /* null terminate list */
}

/*
 * Allocate memory so we can do sampling.
 */
void
monstartup()
{
    kern_return_t kr;
    unsigned int objtype;
    vm_address_t callarcs;


    mutex_lock(&gprof_mutex);

    if (monstartup_needed)  {
	mutex_unlock(&gprof_mutex);
	gt_init();
	mutex_lock(&gprof_mutex);	
	/*
	 * Allocate pc histogram.
	 */
	kr = vm_allocate(mach_task_self(),
			 (vm_address_t *) &gt.pchist_data,
			 gt.pchist_size * sizeof(CHUNK),
			 TRUE);	/* anywhere */
	if (kr != KERN_SUCCESS)  {
	    gprof_warning("monstartup can't allocate pchist");
	    goto done;
	}

	/*
	 * Allocate callarc table.
	 */
	kr = vm_allocate(mach_task_self(),
			 (vm_address_t *) &gprof_calls,
			 gt.pchist_size * sizeof(callarc_t *),
			 TRUE);	/* anywhere */
	if (kr != KERN_SUCCESS)  {
	    gprof_warning("monstartup can't allocate callarc table");
	    goto done;
	}

	/*
	 * Allocate callarcs.
	 */
	gprof_callarc_get(512);

	if (!gprof_ux_server)  {
	    gprof_sample_thread = cthread_fork(gprof_sample_loop_user, 0);
	}
	monstartup_needed = 0;
    }

 done:
    mutex_unlock(&gprof_mutex);
    return;

}



/*
 * Deallocate memory so we can do sampling again.
 */
void
monshutdown()
{
    kern_return_t kr;
    register callarc_t *arcs, *narcs;

    mutex_lock(&gprof_mutex);
    if (!monstartup_needed)  {
	/*
	 * De_allocate pc histogram.
	 */
	kr = vm_deallocate(mach_task_self(),
			   (vm_address_t) gt.pchist_data,
			   (vm_size_t)(gt.pchist_size
				       * sizeof(CHUNK)));
	if (kr != KERN_SUCCESS)  {
	    gprof_warning("monshutdown can't de_allocate pchist");
	}

	/*
	 * De_allocate callarc table.
	 */
	kr = vm_deallocate(mach_task_self(),
			   (vm_address_t ) gprof_calls,
			   (vm_size_t)(gt.pchist_size * sizeof(callarc_t *)));
	if (kr != KERN_SUCCESS)  {
	    gprof_warning("monshutdown can't de_allocate callarc table");
	}

	/*
	 * De_allocate ALL callarcs.
	 */
	arcs = gprof_callarc_pagelist;
	gprof_callarc_pagelist = 0;
	do {
	    narcs = arcs->ca_next;
	    kr = vm_deallocate(mach_task_self(),
			       (vm_address_t) arcs,
			       (vm_size_t)arcs->ca_count);
	    if (kr != KERN_SUCCESS)  {
		gprof_warning("monshutdown can't de_allocate callarcs");
	    }
	} while (arcs = narcs);
    
	monstartup_needed = 1;
    }
    mutex_unlock(&gprof_mutex);
    return;
}



/*
 * mcountaux -- called by _mcount
 */
void
mcountaux(from, to)
	unsigned long from, to;
{
    register callarc_t *carc, *parc;
    register unsigned long idx;

    /*
     * I hope, I hope, I hope ...
     */
    if (!spin_try_lock(&gprof_spin)) {
	register int i;
    	for (i = 50; --i > 0;)
	    if (!spin_lock_locked(&gprof_spin) && spin_try_lock(&gprof_spin))
		break;
	if (i == 0)	/* give up */
	    return;
    }
    /*
     * Find the callarc for the given values of FROM and TO.
     */
    /* Bounds Check */
	
    idx = (from - gt.low_pc) >> CG_SHIFT;
    if (idx >= gt.pchist_size) {
	    gprof_bounds++;
	    goto mcount_done;
    }
    for (carc = gprof_calls[idx];
         carc != (callarc_t *) NULL;
	 parc = carc, carc = carc->ca_next) {
	if (carc->ca_to == to) {
	    if (carc != gprof_calls[idx]) {
	    	parc->ca_next = carc->ca_next;
		carc->ca_next = gprof_calls[idx];
		gprof_calls[idx] = carc;
	    }
	    (carc->ca_count)++;
	    spin_unlock(&gprof_spin);
	    return;
	}
    }

    /*
     * If we didn't find a callarc, grab one.
     */
    if (gprof_callarc_freelist == (callarc_t *)NULL) {
	int oprof = gprof_do_profiling;
	gprof_do_profiling = 0; 
	gprof_callarc_get(512);
        gprof_do_profiling = oprof; 
	if (gprof_callarc_freelist == (callarc_t *)NULL) {
	    gprof_do_profiling = 0;
	    gprof_warning("_mcount: out of callarcs");
	    return;
	}
    }

    /* get a callarc from freelist */
    carc = gprof_callarc_freelist;
    gprof_callarc_freelist = carc->ca_next;
    
    /* initialize the new callarc */
    carc->ca_from = from;
    carc->ca_to = to;
    carc->ca_count = 1;
    carc->ca_next = gprof_calls[idx];
    gprof_calls[idx] = carc;

mcount_done:
    spin_unlock(&gprof_spin);
    return;
}

int gprof_total_samples = 0;

int
gprof_take_samples()
{
    kern_return_t kr;
    int samplecnt, i;
    vm_offset_t pc;

    samplecnt = sizeof(gprof_pc_samples) / sizeof(sampled_pc_t);

    kr = task_get_sampled_pcs(gt.task, &gprof_seqno,
			      gprof_pc_samples, &samplecnt);

    if (kr != KERN_SUCCESS) {
	mach_error("sampling pcs", kr);
	return 0;
    }

    /* store samples for our task in the pc histogram */
    for (i = 0; i < samplecnt; i++) {
	if ( !(gprof_pc_samples[i].sampletype & gprof_do_profiling))  {
	    printf("gprof -- bad sample 0x%x ", gprof_pc_samples[i].sampletype);
	}
	gprof_total_samples++;

	pc = gprof_pc_samples[i].pc;
	if ((pc >= gt.low_pc) && (pc <= gt.high_pc))  {
	    (gt.pchist_data[(pc-gt.low_pc)>>CG_SHIFT])++;
	    if (gprof_debug)  {
		printf("%d %d 0x%x %d 0x%x\n", gprof_total_samples,
		       i, gprof_pc_samples[i].pc,
		       gprof_pc_samples[i].pc,
		       gprof_pc_samples[i].sampletype);
	    }
	}
    }

    /* see if we missed any samples */
    if ((gprof_oseqno + samplecnt) != gprof_seqno) {
	printf("gprof_take_samples: gap from %d to %d\n",
	       gprof_oseqno, gprof_seqno - samplecnt);
	if (gprof_sample_interval > 500)
	gprof_sample_interval >>= 1;
    }

    gprof_oseqno = gprof_seqno;

}




/*
 * Called with gprof mutex held
 */

static kern_return_t
gprof_start_profiling(sample_flavor)
    sampled_pc_flavor_t sample_flavor;
{
    register int i, samplecnt;
    register callarc_t *carc, *carc_next;
    kern_return_t kr;

    if (gprof_do_profiling == 0)  {
	if (monstartup_needed)  {
	    /* drop the lock and reacquire */
	    mutex_unlock(&gprof_mutex);
	    monstartup();
	    mutex_lock(&gprof_mutex);	    
	}

	/*
	 * (a) zero histogram
	 * (b) (re)initialize call graph
	 * (c) (re)start pc sampling
	 */
	
	bzero(gt.pchist_data, gt.pchist_size * sizeof(CHUNK));

	for (i = 0; i < gt.pchist_size; i++) {
	    carc = gprof_calls[i];
	    while (carc != (callarc_t *) NULL) {
		carc_next = carc->ca_next;
		carc->ca_next = gprof_callarc_freelist;
		gprof_callarc_freelist = carc;
		carc = carc_next;
	    }
	    gprof_calls[i] = (callarc_t *) NULL;
	}

	gprof_seqno = gprof_oseqno = 0;

	kr = task_enable_pc_sampling(gt.task, &gprof_tick_sample,
				     sample_flavor);
	if (kr == KERN_SUCCESS)  {
	    gprof_do_profiling = sample_flavor;
	    if (!gprof_ux_server)  {
		/* wakeup the sampler thread */
		condition_signal(&gprof_sample_condition);
	    }
	}
	return kr;
    } else
        return KERN_NAME_EXISTS;
}



/*
 * called with gprof_mutex held
 */
static kern_return_t
gprof_stop_profiling()
{
    int samplecnt;
    kern_return_t kr = KERN_SUCCESS;

    if (gprof_do_profiling)  {
	gprof_take_samples();
        kr = task_disable_pc_sampling(gt.task, &samplecnt);
        gprof_do_profiling = 0;
    }
    return kr;
}


/*
 * EXPORTED ROUTINES
 */

/* EXPORTED */
kern_return_t
do_gprof_mon_switch(proc_port,  sample_flavor)
	mach_port_t	proc_port;
	sampled_pc_flavor_t	*sample_flavor;
{
	int old_value;
	kern_return_t kr;

	if (gprof_debug)
		printf("gprof_mon_switch called\n");
	
	if (*sample_flavor == -1)  {
	     *sample_flavor = gprof_do_profiling;
	     return KERN_SUCCESS;
	}


	mutex_lock(&gprof_mutex);
	spin_lock(&gprof_spin);

	old_value = gprof_do_profiling;

	if (*sample_flavor)
	  kr = gprof_start_profiling(*sample_flavor);
	else
	  kr = gprof_stop_profiling();

	*sample_flavor = old_value;
	spin_unlock(&gprof_spin);
	mutex_unlock(&gprof_mutex);

	if (gprof_ux_server)  {
	    /*
	     * Take gprof_take_samples off the callout queue.
	     * We have to call it here since it will call _mcount.
	     */
	    if (gprof_do_profiling) {
		timeout(gprof_sample_loop_ux, 0, gprof_sample_interval);
	    }
	    else {
		untimeout(gprof_sample_loop_ux,0);
	    }
	}

	return kr;
}


/* EXPORTED */
kern_return_t
do_gprof_mon_dump(proc_port, mon_data, mon_data_cnt)
	mach_port_t	proc_port;
	char		**mon_data;
	int		*mon_data_cnt;
{
	vm_address_t	mon_buffer;
	vm_size_t	mon_size;
	int		callarc_cnt, i;
	callarc_t	*ca;
	kern_return_t	kr;
	struct gprof_header *gh;
	CHUNK		*hist;
	int		old_gprof_do_profiling = gprof_do_profiling;

	gprof_do_profiling = 0;

	/* Count callarcs. */
	callarc_cnt = 0;
	for (i = 0; i < gt.pchist_size; i++)
		for (ca = gprof_calls[i];
		     ca != (callarc_t *) NULL;
		     ca = ca->ca_next)
			callarc_cnt++;

	/* How big is the buffer? */
	mon_size = sizeof(struct gprof_header)
    		+ gt.pchist_size * sizeof(CHUNK)
			+ callarc_cnt * sizeof(struct gprof_call);
  
	kr = vm_allocate(mach_task_self(), &mon_buffer, mon_size, TRUE);
	if (kr != KERN_SUCCESS) {
		gprof_do_profiling = old_gprof_do_profiling;
		return kr;
	}

	gh = (struct gprof_header *) mon_buffer;
	gh->low = gt.low_pc;
	gh->high = gt.high_pc;
	gh->nbytes = sizeof(struct gprof_header) +
			gt.pchist_size * sizeof(CHUNK);

	hist = (CHUNK *) (gh + 1);
	bcopy((char *)gt.pchist_data, (char *)hist,
	      gt.pchist_size * sizeof(CHUNK));

 	hist += gt.pchist_size;
	{
		register struct gprof_call *gc;

		gc = (struct gprof_call *) hist;

		for (i = 0; i < gt.pchist_size; i++)
			for (ca = gprof_calls[i];
			     ca != (callarc_t *) NULL;
			     ca = ca->ca_next) {
				/*
				 * We have to use bcopy since gc might
				 * not be word-aligned.
				 */
				bcopy(ca, gc, sizeof(struct gprof_call));
				gc++;
			}
	}

	*mon_data = (char *) mon_buffer;
	*mon_data_cnt = mon_size;

	gprof_do_profiling = old_gprof_do_profiling;
	return KERN_SUCCESS;
}


void    
gprof_sample_loop_ux()
{
    /* Use the handy dandy timeout queue */
    mutex_lock(&gprof_mutex);
    if (gprof_do_profiling == 0)  {
	mutex_unlock(&gprof_mutex);
	return;
    }
    gprof_take_samples();
    mutex_unlock(&gprof_mutex);
    timeout(gprof_sample_loop_ux, 0, gprof_sample_interval);
}






/**************************
 * Timer facilities
 *
 * Only used for profiling programs, but not UX.
 *
 **************************/


static mach_port_t gprof_timeoutport = 0;
static mach_msg_header_t gprof_msgh;	    

gprof_timer_pause_ms(ms)
{
    if (gprof_ux_server)
	printf("Warning: gprof_support timer used in UX\n");

    if (!gprof_timeoutport)  {
	gprof_timer_init();
    }
    return mach_msg(&gprof_msgh,
		    MACH_RCV_MSG|MACH_RCV_TIMEOUT,	/* option */
		    0,			/* send size */
		    0,			/* receive size */
		    gprof_timeoutport,	/* port */
		    ms,
		    MACH_PORT_NULL);
		    
}


kern_return_t
gprof_timer_init()
{
    kern_return_t krc;

    if (gprof_ux_server)
	printf("Warning: gprof_support timer init in UX\n");
    
    krc = mach_port_allocate(mach_task_self(),
			     MACH_PORT_RIGHT_RECEIVE, &gprof_timeoutport);
    if (krc != KERN_SUCCESS)  {
        mach_error("mach_port_allocate", krc);
    }
    gprof_msgh.msgh_local_port = MACH_PORT_NULL;
    gprof_msgh.msgh_remote_port = MACH_PORT_NULL;
    return krc;
}





void*
gprof_sample_loop_user(any)
    void* any;
{

    for (;;)  {
	gprof_timer_pause_ms(gprof_sample_interval);
	mutex_lock(&gprof_mutex);
	while (!gprof_do_profiling)  {
	    condition_wait(&gprof_sample_condition, &gprof_mutex);
	}	
        gprof_take_samples();	
	mutex_unlock(&gprof_mutex);

    }
}








    
