/*
* Copyright (c) 1992 Carnegie Mellon University 
*                    SCAL project: Guy Blelloch, Siddhartha Chatterjee,
*                                  Jonathan Hardwick, Jay Sipelstein,
*                                  Marco Zagha
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* The SCAL project requests users of this software to return to 
*
*  Guy Blelloch				guy.blelloch@cs.cmu.edu
*  School of Computer Science
*  Carnegie Mellon University
*  5000 Forbes Ave.
*  Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/

/* This file contains some library functions */
#include "defins.h"
#include <cvl.h>

/* -------------------------SPLIT-----------------------------*/

/* Split uses a cvl_bool vector to partition the elements of a vector.
 * Elements corresponding to False move to the bottom, to True move to the 
 * top.  Split returns the number of False items in the vector.
 *	d = destination vector
 *	s = source vector
 *	f = flag vector (cvl_bool)
 *	len = length of all vectors
 */

#define make_split(_name, _type)				\
	int _name (d, s, f, len, scratch)			\
	vec_p d, s, f, scratch;					\
	int len;						\
{								\
	cvl_bool *nf = (cvl_bool *)f;				\
	_type *bot = (_type *) d;				\
	_type *top;						\
	_type *src = (_type *) s;				\
	int count = 0;						\
								\
	unroll(len, count += *nf++;)				\
	count = len - count;					\
	top = (_type *)d + count;				\
								\
	nf = (cvl_bool *) f;					\
	unroll(len,						\
		if (*nf++)					\
			*top++ = *src++;			\
		else						\
			*bot++ = *src++;			\
	)							\
	return count;						\
}								\
	make_no_scratch(_name)					\
	make_inplace(_name,INPLACE_TRUE)

make_split(spl_luz, int)
make_split(spl_lub, cvl_bool)
make_split(spl_lud, double)

/* Segmented split: performs a split operation on each segment of a
 * segmented vector.  Returns the split vector and a vector giving the 
 * number of False elements in each segment of the flag vector.
 *	d = destination vector  (same length and type as s)
 *	d2 = vector in which to put number of false's in corresponding 
 *		segment of flag vector (unsegmented vector of ints)
 *	s = src vector (segmented same as f)
 *	f = flag vector (segmented vector of cvl_bools) same sd as s
 *	sd, n, m = segment descriptor of f and s
 */
#define make_seg_split(_name, _type)				\
	void _name(d, d2, s, f, sd, n, m, scratch)		\
	vec_p d, d2, s, f, sd, scratch;				\
	int n, m;						\
{								\
	cvl_bool *fend = (cvl_bool *)f;				\
	cvl_bool *flags = (cvl_bool *)f;			\
	int *segd = (int *)sd;					\
	int *segd_end = (int *)sd + m;				\
	_type *src = (_type *)s;				\
	_type *bot = (_type *)d;				\
	_type *top;						\
	int *count_vec = (int *)d2;				\
								\
	while (segd < segd_end) {				\
		cvl_bool *nf = flags;				\
		int count = 0;					\
		fend += *(segd++);				\
		while (nf < fend) 				\
			count += !(*(nf++));			\
		*(count_vec++) = count;				\
		top = bot + count;				\
		while (flags < fend) {				\
			if (*(flags++))				\
				*(top++) = *(src++);		\
			else					\
				*(bot++) = *(src++);		\
		}						\
		bot = top;					\
	}                               			\
}								\
	make_no_seg_scratch(_name)				\
	make_inplace(_name,INPLACE_TRUE)

make_seg_split(spl_lez, int)
make_seg_split(spl_leb, cvl_bool)
make_seg_split(spl_led, double)

/*----------------------------index (iota)-----------------*/
/* Index(len) creates an integer vector of the numbers from 0 to len-1 */

void ind_luz(d, len, scratch)
vec_p d, scratch;
int len;
{                                 
  int i = 0;
  int *dest = (int *)d;
  unroll(len, *(dest++) = i++;)
}
make_no_scratch(ind_luz)
make_inplace(ind_luz,INPLACE_TRUE)

/* Segmented index creates a segmented vector of index results, given a 
 * vector of lengths.
 */
void ind_lez(d, sd, n, m, scratch)      
vec_p d, sd, scratch;
int n, m;      
{                                 
  int *dest = (int *)d;
  int *dend = dest;
  int *segd = (int *)sd;
  int *segd_end = segd + m;
  while (segd < segd_end) {
	int i =0;
    dend += *(segd++);
    while (dest < dend)  {           
      *(dest++) = i++;               
    }                             
  }                               
}
make_no_seg_scratch(ind_lez)
make_inplace(ind_lez,INPLACE_TRUE)

/* --------------------------pack--------------------------*/
/* The pack functions take a source vector and a flag vector and return
 * in a destintation vector all those elements corresponding to True.
 * The return value of pack is the number of elements in the result vector.
 *	d = destination vector
 *	s = source vector  (same type as d)
 *	f = cvl_bool flag vector
 *	len = length of vectors
 */

#define make_pack(_name, _type)					\
	int _name (d, s, f, len, scratch)			\
	vec_p d, s, f, scratch;					\
	int len;						\
{								\
	_type *dest = (_type *)d;				\
	_type *src = (_type *)s;				\
	cvl_bool *flags = (cvl_bool *)f;			\
	unroll(len,						\
	if (*flags++) *dest++ = *src;				\
		src++;)						\
	return (dest - (_type *)d);				\
}								\
	make_no_scratch(_name)					\
	make_inplace(_name,INPLACE_TRUE)

make_pack(pck_luz, int)
make_pack(pck_lub, cvl_bool)
make_pack(pck_lud, double)

/* segmented pack: Packs a segmented vector into dest and creates
 * segment descriptor for it in seg_dest
 * Pack is split up into two parts.  The first takes the 
 * packing flag vector and its segd and returns the lengths vector
 * describing the result of the final pack.
 */

/* pack1: returns the lengths vector of the result of the pack
 *	ds = destination segment descriptor
 * 	f = boolean flags (same length as s)
 * 	sd = source segment descriptor
 *	n = length of f
 *	m = number of segments of f
 */

#define make_pack1(_name)					\
	void _name (ds, f, sd, n, m, scratch)			\
	vec_p ds, f, sd, scratch;				\
	int n, m;						\
    {								\
	int *dest_sd = (int *)ds;				\
	int *segd_flag = (int *)sd;				\
	int *segd_flag_end = segd_flag + m;			\
	cvl_bool *flag = (cvl_bool *)f;				\
	cvl_bool *flag_end = flag;				\
								\
	while (segd_flag < segd_flag_end) {			\
	    int count = 0;					\
	    flag_end += *(segd_flag++);				\
	    while (flag < flag_end) {				\
		if (*flag++) count++;				\
	    }							\
	    *dest_sd++ = count;					\
	}							\
    }								\
    make_no_seg_scratch(_name)					\
    make_inplace(_name, INPLACE_FALSE)

/* pack2: returns the pack 
 */
#define make_pack2(_name, _type)				\
	void _name (d, s, f, sd_s, n_s , m_s, sd_d, n_d, m_d, scratch)\
	vec_p d, s, f, sd_s, sd_d, scratch;			\
	int n_s, m_s, n_d, m_d;					\
    {								\
	_type *dest = (_type *)d;				\
	cvl_bool *flag = (cvl_bool *)f;				\
	_type *src = (_type *)s;				\
	int *segd_src = (int *)sd_s;				\
	int *segd_src_end = segd_src + m_s;			\
	_type *src_end = src;					\
								\
	while (segd_src < segd_src_end) {			\
	    src_end += *(segd_src++);				\
	    while (src < src_end) {				\
		if (*flag++) *dest++ = *src;			\
		src++;						\
	    }							\
	}							\
    }								\
    make_no_seg2_scratch(_name)					\
    make_inplace(_name, INPLACE_FALSE)

make_pack1(pk1_lev)

make_pack2(pk2_lez, int)
make_pack2(pk2_leb, cvl_bool)
make_pack2(pk2_led, double)

/* -----------------------shuffle (interleave) ------------*/
/* The sfl functions take two vectors and perform a rifle shuffle
 * on their values into the destination.  This shuffle interleaves the
 * values of the vectors:
 *			d[2k] = s1[k], d[2k+1] = s2[k]
 *	d = destination vector
 *	s1, s2 = source vectors
 *	len = length of each source vector
 * Should this rewritten to store first one vector and than the other?
 * This may result in better cache performance.
 */

#define make_sfl(_name, _type)					\
	void _name(d, s1, s2, len, scratch)			\
	vec_p d, s1, s2, scratch;				\
	int len;						\
{								\
	_type *dest = (_type *)d;				\
	_type *src1 = (_type *)s1;				\
	_type *src2 = (_type *)s2;				\
    unroll(len,							\
      *(dest++) = *(src1++);					\
      *(dest++) = *(src2++);)					\
}								\
	make_no_scratch(_name)					\
	make_inplace(_name,INPLACE_TRUE)

make_sfl(sfl_luz, int)
make_sfl(sfl_lub, cvl_bool)
make_sfl(sfl_lud, double)

/*---------------------fpermute----------------------------*/
/* The fpm functions perform a select permute on the source 
 * vector according to a set of flags.
 * 	d = dest   
 *	s = source (same type as d)
 *	i = index (compatible with d)
 *	f = flags (compatible with d)
 *	len_src = length of source vector
 *	len_dest = length of dest (number T in f)
 */

#define make_fpm(_name, _type)					\
	void _name(d, s, i, f, len_src, len_dest, scratch)	\
	vec_p d, s, i, f, scratch;				\
	int len_src, len_dest;					\
    {								\
	_type *dest = (_type *)d;				\
	_type *src = (_type *)s;				\
	cvl_bool *flags = (cvl_bool *)f;			\
	int *index = (int *)i;					\
    unroll(len_src,						\
	if (*flags++) dest[*(index++)] = *src++;		\
	else {index++; src++;}					\
       )							\
    }								\
make_no2_scratch(_name)						\
make_inplace(_name, INPLACE_FALSE)


make_fpm(fpm_puz, int)
make_fpm(fpm_pub, cvl_bool)
make_fpm(fpm_pud, double)

#define make_seg_fpm(_name, _type)				\
	void _name(d, s, i, f, sd_s, n_s, m_s, sd_d, n_d, m_d, scratch)	\
	vec_p d, s, i, f, sd_s, sd_d, scratch;			\
	int n_s, m_s, n_d, m_d;					\
    {								\
	register _type *dest = (_type *)d;			\
	register _type *src = (_type *)s;			\
	_type *src_end = (_type *)s;				\
	register cvl_bool *flags = (cvl_bool *)f;		\
	register int *index = (int *)i;				\
	int *segd_src = (int *)sd_s;				\
	int *segd_src_end = segd_src + m_s;			\
	int *segd_dest = (int *) sd_d;				\
								\
	while (segd_src < segd_src_end) {			\
	    src_end += *(segd_src++);				\
	    while (src < src_end) {				\
		if (*flags++) *(dest + *(index++)) = *src++;	\
		else {index++; src++;}				\
	    }							\
	    dest += *(segd_dest++);				\
	}							\
    }								\
make_no_seg2_scratch(_name)					\
make_inplace(_name, INPLACE_FALSE)

make_seg_fpm(fpm_pez, int)
make_seg_fpm(fpm_peb, cvl_bool)
make_seg_fpm(fpm_ped, double)

/*------------------------Bfpermute-------------------------*/
/* inverse-permute with flags.  Unfilled positions in vector
 * set to 0 (int, double) or F (bools).
	d = destination
	s = source	 (same type as d)
	i = index vector (same length as d)
	f = flags	 (same length as d)
	len_src          
	len_dest
 */

#define make_bfp(_name, _type)					\
	void _name(d, s, i, f, len_src, len_dest, scratch)	\
	vec_p d, s, i, f, scratch;				\
	int len_src, len_dest;					\
    {								\
	_type *dest = (_type *)d;				\
	_type *src = (_type *)s;				\
	cvl_bool *flags = (cvl_bool *)f;			\
	int *index = (int *)i;					\
    unroll(len_dest,						\
	if (*flags++) *dest = src[*index];			\
	else *dest = (_type) 0;          			\
        index++; dest++;                                        \
       )							\
    }								\
make_no2_scratch(_name)						\
make_inplace(_name, INPLACE_FALSE)

make_bfp(bfp_puz, int)
make_bfp(bfp_pub, cvl_bool)
make_bfp(bfp_pud, double)

#define make_seg_bfp(_name, _type)				\
	void _name(d, s, i, f, sd_s, n_s, m_s, sd_d, n_d, m_d, scratch)  \
	vec_p d, s, i, f, sd_s, sd_d, scratch;			\
	int n_s, m_s, n_d, m_d;					\
	{							\
	    _type *src = (_type *)s;				\
	    _type *dest; 					\
	    int *segd_dest = (int *) sd_d;			\
	    int *segd_src = (int *) sd_s;			\
	    int *segd_end = segd_src + m_s;			\
	    cvl_bool *flags = (cvl_bool *)f;			\
	    cvl_bool *flags_end = flags;			\
	    int *index = (int *)i;				\
								\
	    dest = (_type *)d;					\
	    bzero((char*) dest, n_d * sizeof(_type));		\
	    while (segd_src < segd_end) {			\
		flags_end += *segd_dest++;			\
		while (flags < flags_end) {			\
		    if (*flags++) *dest++ = src[*index++];	\
		    else {index++; dest++;}			\
		}						\
		src += *(segd_src++);				\
	    }							\
	}							\
    make_no_seg2_scratch(_name)					\
    make_inplace(_name, INPLACE_FALSE)

make_seg_bfp(bfp_pez, int)
make_seg_bfp(bfp_peb, cvl_bool)
make_seg_bfp(bfp_ped, double)

/* ------------------------dpermute ----------------------------*/
/* permute with default.  Any element not filled by the permute
 * gets set to the corresponding element of the default vector.
 * first copy default into dest, then do permute.
 * 	d = destination 
 *	s = source (same type as d)
 *	i = index vector (same length as s)
 *	v = default vector (same length as d)
 *	len_src 
 *	len_dest
 */
void dpe_puz(d, s, i, v, len_src, len_dest, scratch)
    vec_p d, s, i, v, scratch;
    int len_src, len_dest;
    {
    if (d != v) cpy_wuz(d, v, len_dest, scratch);
    smp_puz(d, s, i, len_src, scratch);
    }
make_inplace(dpe_puz, INPLACE_FALSE)
make_no2_scratch(dpe_puz)

void dpe_pub(d, s, i, v, len_src, len_dest, scratch)
    vec_p d, s, i, v, scratch;
    int len_src, len_dest;
    {
    if (d != v) cpy_wub(d, v, len_dest, scratch);
    smp_pub(d, s, i, len_src, scratch);
    }
make_inplace(dpe_pub, INPLACE_FALSE)
make_no2_scratch(dpe_pub)

void dpe_pud(d, s, i, v, len_src, len_dest, scratch)
    vec_p d, s, i, v, scratch;
    int len_src, len_dest;
    {
    if (d != v) cpy_wud(d, v, len_dest, scratch);
    smp_pud(d, s, i, len_src, scratch);
    }
make_inplace(dpe_pud, INPLACE_FALSE)
make_no2_scratch(dpe_pud)

/* Can't do segmented version the same way, since simple permute
 * requires the same segdes for src and dest.  (compare this to code
 * for dpe_wez).
 */

#define make_seg_dpe(_type_let,_type)				\
void GLUE(dpe_pe,_type_let) (d, s, i, v, sd_s, n_s, m_s, sd_d, n_d, m_d, scratch)\
    vec_p d, s, i, v, sd_s, sd_d, scratch;			\
    int n_s, n_d, m_s, m_d;					\
{								\
    _type *src = (_type *)s;					\
    _type *dest = (_type *)d;					\
    int *index = (int *)i;					\
    int *segd_src = (int *)sd_s;				\
    int *segd_src_end = segd_src + m_s;				\
    int *segd_dest = (int *)sd_d;				\
    _type *src_end = src;					\
								\
    if (d != v) GLUE(cpy_wu,_type_let) (d, v, n_d, scratch);	\
    while (segd_src < segd_src_end) {				\
	src_end += *segd_src++;					\
	while (src < src_end) {					\
	    *(dest + *(index++)) = *src++;			\
	}							\
	dest += *segd_dest++;					\
    }								\
}

make_seg_dpe(z,int)
make_seg_dpe(b,cvl_bool)
make_seg_dpe(d,double)

make_inplace(dpe_pez, INPLACE_FALSE)
int dpe_pez_scratch(n_in, m_in, n_out, m_out) int n_in, m_in, n_out, m_out; {return cpy_wuz_scratch(n_out);}

make_inplace(dpe_peb, INPLACE_FALSE)
int dpe_peb_scratch(n_in, m_in, n_out, m_out) int n_in, m_in, n_out, m_out; {return cpy_wub_scratch(n_out);}

make_inplace(dpe_ped, INPLACE_FALSE)
int dpe_ped_scratch(n_in, m_in, n_out, m_out) int n_in, m_in, n_out, m_out; {return cpy_wud_scratch(n_out);}

/*----------------------dfpermute----------------------------*/
/* default permute with flags.  Any element not filled by the fpermute
 * gets set to the corresponding element of the default vector.
 * first copy default into dest, then do fpermute.
 * 	d = destination 
 *	s = source (same type as d)
 *	i = index vector (same length as s)
 *	v = default vector (same length as d)
 *	len_src 
 *	len_dest
 */
#define make_dfp(_type_let)					\
void GLUE(dfp_pu,_type_let) (d, s, i, f, v, len_src, len_dest, scratch)\
    vec_p d, s, i, f, v, scratch;				\
    int len_src, len_dest;					\
{								\
    if (d != v) GLUE(cpy_wu,_type_let) (d, v, len_dest, scratch);	\
    GLUE(fpm_pu,_type_let) (d, s, i, f, len_src, len_dest, scratch);	\
}
make_dfp(z)
make_dfp(b)
make_dfp(d)

make_inplace(dfp_puz, INPLACE_FALSE)
make_no2_scratch(dfp_puz)

make_inplace(dfp_pub, INPLACE_FALSE)
make_no2_scratch(dfp_pub)

make_inplace(dfp_pud, INPLACE_FALSE)
make_no2_scratch(dfp_pud)

#define make_seg_dfp(_type_let)					\
void GLUE(dfp_pe,_type_let) (d, s, i, f, v, sd_s, n_s, m_s, sd_d, n_d, m_d, scratch)\
    vec_p d, s, i, f, v, sd_s, sd_d, scratch;			\
    int n_s, n_d, m_s, m_d;					\
{								\
    if (d != v) GLUE(cpy_wu,_type_let) (d, v, n_d, scratch);	\
    GLUE(fpm_pe,_type_let) (d, s, i, f, sd_s, n_s, m_s, sd_d, n_d, m_d, scratch);\
}

make_seg_dfp(z)
make_seg_dfp(b)
make_seg_dfp(d)

make_inplace(dfp_pez, INPLACE_FALSE)
make_no_seg2_scratch(dfp_pez)

make_inplace(dfp_peb, INPLACE_FALSE)
make_no_seg2_scratch(dfp_peb)

make_inplace(dfp_ped, INPLACE_FALSE)
make_no_seg2_scratch(dfp_ped)
