
/*@@@**************************************************************************
 ** \file  movingSumFilter_inline
 * \date   Thu Nov 12 13:56:53 EST 2009
 * \author Hernan Badino
 * \notes  
*******************************************************************************
*****          (C) COPYRIGHT Hernan Badino - All Rights Reserved          *****
******************************************************************************/

//#include "ippDefs.h"

#if defined ( _OPENMP )
 #include <omp.h>
#endif

template <class _SrcType, class _DstType>
inline bool
CMovingSumFilter<_SrcType, _DstType>::compute ( const CTypedImage<_SrcType> &f_srcImg,
                                                CTypedImage<_DstType>       &fr_dstImg,
                                                const S2D<unsigned int>     f_maskSize,
                                                const double                f_norm_d )
{
    if ( fr_dstImg.getWidth()  < f_srcImg.getWidth() ||
         fr_dstImg.getHeight() < f_srcImg.getHeight() ||
         f_srcImg.getHeight() < f_maskSize.height ||
         f_srcImg.getWidth() < f_maskSize.width )
        return false;

    const S2D<int> kS ( f_maskSize.width,f_maskSize.height);

    const S2D<unsigned int> anchor( (f_maskSize.width-1)/2, 
                                    (f_maskSize.height-1)/2 );

    const S2D<unsigned int> hKS( (f_maskSize.width)/2, 
                                 (f_maskSize.height)/2 );
    
    const int h_i = f_srcImg.getSize().height;
    const int w_i = f_srcImg.getSize().width;
    
#if defined ( _OPENMP )
    const int numThreads_i = omp_get_max_threads();
    
    int numP_i = numThreads_i;
    /// Check how many processes must start according to the height of the image.
    if ( h_i / numThreads_i < kS.height )
        numP_i = std::min(std::max(1, h_i / kS.height ), numThreads_i);

    int hp_i = h_i / numP_i;
    _DstType * sumsVector_p = new _DstType[w_i*numP_i];

#pragma omp parallel for num_threads(numP_i) schedule(static)
    for (int p = 0; p < numP_i; ++p)
    {
        int rowBot_i, rowTop_i;

        rowTop_i = std::max(0,  p * hp_i - (int) hKS.height);
        if ( p == (numP_i-1) )
            rowBot_i = h_i-1;
        else
            rowBot_i = ((p+1) * hp_i - 1) + hKS.height;

        _DstType * sums_p = sumsVector_p + p * w_i;

        //printf("Computing MOving sum from rowBot_i = %i to rowTop_i = %i with kS.height = %i and h_i = %i numThreads_i = %i and maxP = %i\n",
        //       rowBot_i, rowTop_i, kS.height, h_i, numThreads_i, numP_i );
#else
    _DstType * sums_p = new _DstType[w_i];
    int rowBot_i = h_i - 1;
    int rowTop_i = 0;
    {
#endif
        /// Column sums.
        memset(sums_p, 0, sizeof(_DstType) * w_i);
        
        /// Compute first vector sum.
        for (int j = 0 ; j < w_i; ++j)
        {
            for (int i = rowTop_i; i < rowTop_i + kS.height; ++i)
            {
                sums_p[j] += f_srcImg.getScanline(i)[j];
            }
        }
        
        for (int i = rowTop_i; i <= rowBot_i-kS.height; ++i) //Check if <= or <
        {
            _SrcType * range1_p = f_srcImg.getScanline(i+kS.height);
            _SrcType * range2_p = f_srcImg.getScanline(i);
            _DstType * ptrsum_p = sums_p;
            
            _DstType   sum;
            
            /// Make first sums.
            _DstType * ptrL_p = sums_p;
            _DstType * ptrR_p = sums_p;// + kS.width - 1;

            sum = *ptrR_p++;
            for (int j = 1; j < kS.width; ++j, ++ptrR_p)
            {
                sum += *ptrR_p;
            }
        
            _DstType *dst_p = fr_dstImg.getScanline(i+anchor.height) + anchor.width;

            for (int j = anchor.width; j < (int)(w_i - hKS.width)-1;
                 ++j, ++dst_p, ++ptrL_p, ++ptrR_p, ++range1_p, ++range2_p, ++ptrsum_p)
            {
                *dst_p = (_DstType) (sum / f_norm_d);
                //printf("Storing %f in %i %i\n", *dst_p, i, j);
                sum += *ptrR_p - *ptrL_p;
            
                *ptrsum_p += *range1_p - *range2_p;
            }

            *dst_p = (_DstType) (sum / f_norm_d);

            for (int j = 0; j < (int)kS.width; ++j, ++range1_p, ++range2_p, ++ptrsum_p)
            {
                *ptrsum_p += *range1_p - *range2_p;
            }
        }

        /// Make first sums.
        _DstType * ptrL_p = sums_p;
        _DstType * ptrR_p = sums_p;// + kS.width - 1;

        _DstType sum = *ptrR_p++;
        for (int j = 1; j < kS.width; ++j, ++ptrR_p)
        {
            sum += *ptrR_p;
        }

        _DstType *dst_p = fr_dstImg.getScanline(rowBot_i-hKS.height) + anchor.width;

        for (int j = anchor.width; j < (int)(w_i - hKS.width)-1; ++j, ++dst_p, ++ptrL_p, ++ptrR_p)
        {
            *dst_p = (_DstType) (sum / f_norm_d);
            sum += *ptrR_p - *ptrL_p;

        }
        *dst_p = (_DstType) (sum / f_norm_d);
    }

#if not defined ( _OPENMP )
    delete [] sums_p;
#else
    delete [] sumsVector_p;
#endif

    return true;
}
