/*@@@**************************************************************************
* \file  normalPredictor.cpp
* \author Hernan Badino
* \date  Mon Jun 22 14:54:58 EDT 2009
* \notes 
*******************************************************************************
*****     (C) COPYRIGHT Hernan Badino - All Rights Reserved               *****
******************************************************************************/

/* INCLUDES */
#include "normalPredictor.h"
#include "sphericalRangeImage.h"
#include "drawingList.h"
#include "colorEncoding.h"
#include "stereoCamera.h"

#include "imageConversion.h"

#include "uShortImage.h"
#include "medianFilter.h"
#include "prewittFilter.h"

#include <stdio.h>
#include <stdlib.h>

#if defined ( _OPENMP )
 #include <omp.h>
#endif

#include "ippDefs.h"

#include <sys/time.h>

using namespace VIC;

/// Constructors.
CNormalPredictorOp::CNormalPredictorOp ( COperator * const f_parent_p )
        : COperator (                           f_parent_p, "NormalPredictor" ),
          GT_INVALID (                      std::numeric_limits<float>::max() ),
          m_fodMethod_e (                                        FODM_3DSPACE ),
          m_normalImg (                                                       ),
          m_kernelSize (                                                 3, 3 ),
          m_applySobel_b (                                               true ),
          m_applyGauss_b (                                              false ),
          m_rangeImg (                                                        ),
          m_bufferImg (                                                       ),
          m_gradXImg (                                                        ),
          m_gradYImg (                                                        ),
          m_colorEncGrad (        CColorEncoding::CET_HUE, S2D<float>(-1.,1.) ),
          m_encodingMode_e (                                     CEM_VERTICAL ),
          m_colorEncNormal ( CColorEncoding::CET_HUE, S2D<float>(-M_PI, M_PI) ),
          m_vectorLength_d (                                              0.2 ),
          m_normalMode_e (                                     NCM_NxCrossNy1 ),
          m_show3DNormals_b (                                           false ),
          m_show3DLocal_b (                                              true ),
          m_firstDispUV (                                                     ),
          m_sphDispU (                                                        ),
          m_sphDispV (                                                        ),
          m_auxRange (                                                        ),
          m_dispU (                                                           ),
          m_dispV (                                                           ),
          m_auxDispU (                                                        ),
          m_auxDispV (                                                        ),
          m_colorEncFirstDisp (    CColorEncoding::CET_HUE, S2D<float>(-1, 1) ),
          m_filterMF_b (                                                 true ),
          m_filterGauss_b (                                              true ),
          m_gaussIters_ui (                                                 1 )
{
    registerDrawingLists();

    CEnumParameter<EFirstOrderDispMethod_t> * fodMode_p = static_cast<CEnumParameter<EFirstOrderDispMethod_t> * > (
            ADD_ENUM_PARAMETER( "First order disparity method",
                                "Set the first order disparity method to use.",
                                EFirstOrderDispMethod_t,
                                m_fodMethod_e,
                                this,
                                FODMethod,
                                CNormalPredictorOp ) );
    
    fodMode_p -> addDescription ( FODM_3DSPACE,   "From 3D Normals" );
    fodMode_p -> addDescription ( FODM_DISPSPACE, "From Predicted Disp Img" );
    

    BEGIN_PARAMETER_GROUP("Normal computation", false, CColor::red );

      ADD_UINT2D_PARAMETER( "Kernel Size",
                            "Kernel size for the application of prewitt/sobel "
                            "filter ",
                            m_kernelSize,
                            "W", "H",
                            this,
                            KernelSize,
                            CNormalPredictorOp );

      ADD_BOOL_PARAMETER( "Apply Sobel (false=Prewitt)",
                          "Apply Sobel (true) or Prewitt filter for computing derivatives "
                          "(if the mask is 3x3, otherwise Prewitt is applied).",
                          m_applySobel_b,
                          this,
                          ApplySobel,
                          CNormalPredictorOp );

      ADD_BOOL_PARAMETER( "Apply Gauss Filter",
                          "Apply Gauss before computing gradients?",
                          m_applyGauss_b,
                          this,
                          ApplyGauss,
                          CNormalPredictorOp );

      CEnumParameter<ENormalComputationMethod_t> * compMode_p = static_cast<CEnumParameter<ENormalComputationMethod_t> * > (
              ADD_ENUM_PARAMETER( "Normal computation mode",
                                  "Set the normal vector computation mode.",
                                  ENormalComputationMethod_t,
                                  m_normalMode_e,
                                  this,
                                  NormalMode,
                                  CNormalPredictorOp ) );

      compMode_p -> addDescription ( NCM_NxCrossNy1,  "Standard (Cross Product)" );
      compMode_p -> addDescription ( NCM_NxCrossNy2,  "Standard 2.0v (Cross Product)" );
      compMode_p -> addDescription ( NCM_Jacobian1,   "Jacobian method 1" );
      compMode_p -> addDescription ( NCM_Jacobian2,   "Jacobian method 2" );
      compMode_p -> addDescription ( NCM_3DDIFF,      "Difference of 3D Points" ); 
      compMode_p -> addDescription ( NCM_Rotation,    "Rotation" );
      compMode_p -> addDescription ( NCM_LS,          "Least Squares Fitting" );

   END_PARAMETER_GROUP;
    BEGIN_PARAMETER_GROUP("Display Options", false, CColor::red );

      m_colorEncGrad.addParameters ( m_paramSet_p,
                               "Gradient Color Encoding", 
                               "Color encoding for the results of the sobel operation." );

      addDrawingListParameter ( "Left Image Normal Overlay" );
      
      CEnumParameter<EColorEncodingMode_t> * param_p = static_cast<CEnumParameter<EColorEncodingMode_t> * > (
              ADD_ENUM_PARAMETER( "Normal color encoding mode",
                                  "Set the color encoding mode for normal vectors.",
                                  EColorEncodingMode_t,
                                  m_encodingMode_e,
                                  this,
                                  ColorEncodingMode,
                                  CNormalPredictorOp ) );
      
      param_p -> addDescription ( CEM_VERTICAL,   "Vertical" );
      param_p -> addDescription ( CEM_HORIZONTAL, "Horizontal" );
      param_p -> addDescription ( CEM_NORMAL_X,   "X Normal Component" );
      param_p -> addDescription ( CEM_NORMAL_Y,   "Y Normal Component" );
      param_p -> addDescription ( CEM_NORMAL_Z,   "Z Normal Component" );
      
      m_colorEncNormal.addParameters ( m_paramSet_p,
                                       "Normal Color Encoding", 
                                       "Color encoding for the normal vectors." );

      ADD_DOUBLE_PARAMETER( "Vector Length",
                            "Length of vector for displaying normals.",
                            m_vectorLength_d,
                            this,
                            VectorLength,
                            CNormalPredictorOp );
      
      addDrawingListParameter ( "Horizontal Gradient" );
      addDrawingListParameter ( "Vertical Gradient" );
      addDrawingListParameter ( "Range/Depth Image" );
      

      ADD_LINE_SEPARATOR;

      ADD_BOOL_PARAMETER( "Show 3D normals",
                          "Show normals in 3D visualization tool.",
                          m_show3DNormals_b,
                          this,
                          Show3DNormals,
                          CNormalPredictorOp );

      ADD_BOOL_PARAMETER( "Show in local coord.",
                          "If true: show normals in local coordinate system. If "
                          "false show normals in global coordinate system.",
                          m_show3DLocal_b,
                          this,
                          Show3DLocal,
                          CNormalPredictorOp );



      ADD_LINE_SEPARATOR;

      addDrawingListParameter ( "First Order Disp U" );
      addDrawingListParameter ( "First Order Disp V" );
      m_colorEncFirstDisp.addParameters ( m_paramSet_p,
                                          "First Order Disparity", 
                                          "Color encoding for the first order disparity results." );

    END_PARAMETER_GROUP;
 
    ADD_DOUBLE_PARAMETER( "Free Parameter",
                          "",
                          m_freeParam_d,
                          this,
                          FreeParam,
                          CNormalPredictorOp );

    BEGIN_PARAMETER_GROUP("First Order Disparities", false, CColor::red );


    ADD_BOOL_PARAMETER( "Median Filter",
                        "Apply Gauss to the first order disparities.",
                        m_filterMF_b,
                        this,
                        FilterMF,
                        CNormalPredictorOp );

    ADD_BOOL_PARAMETER( "Gauss Filter",
                        "Apply Gauss to the first order disparities.",
                        m_filterGauss_b,
                        this,
                        FilterGauss,
                        CNormalPredictorOp );

    ADD_UINT_PARAMETER( "Gauss Iterations",
                        "Number of Gauss iterations.",
                        m_gaussIters_ui,
                        this,
                        GaussIters,
                        CNormalPredictorOp );

    END_PARAMETER_GROUP;
}

/// Virtual destructor.
CNormalPredictorOp::~CNormalPredictorOp ()
{
}

void CNormalPredictorOp::registerDrawingLists()
{
    /// Register drawing list.
    registerDrawingList ( "Left Image Normal Overlay",
                         S2D<int> (0, 0),
                         false );

    /// Register drawing list.
    registerDrawingList ( "Horizontal Gradient",
                         S2D<int> (0, 1),
                         false );

    /// Register drawing list.
    registerDrawingList ( "Vertical Gradient",
                         S2D<int> (1, 1),
                         false );

    /// Register drawing list.
    registerDrawingList ( "Range/Depth Image",
                         S2D<int> (1, 1),
                         false );

    /// Register drawing list.
    registerDrawingList ( "First Order Disp U",
                          S2D<int> (1, 1),
                          false );

    registerDrawingList ( "First Order Disp V",
                          S2D<int> (1, 1),
                          false );
}

/// Init event.
bool CNormalPredictorOp::initialize()
{
    
    allocateImages( );
   
    return COperator::initialize();
}

/// Reset event.
bool CNormalPredictorOp::reset()
{
    return COperator::reset();
}

bool CNormalPredictorOp::exit()
{
    return COperator::exit();
}

/// Cycle event.
bool CNormalPredictorOp::cycle()
{
    if ( m_fodMethod_e == FODM_3DSPACE )
    {
        computeFODFrom3DNormals();
    }
    else
    {
        computeFODFromDispImgPred();
    }
    
        
    registerOutput ( "First Order Disp U", &m_dispU );
    registerOutput ( "First Order Disp V", &m_dispV );
   
    return COperator::cycle();
}

void 
CNormalPredictorOp::computeFODFromDispImgPred()
{
    CFloatImage *predDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Disparity Image" ) );

    if (!predDispImg_p)
        return;

    CPrewittFilter<float, float>::computeVertical  ( *predDispImg_p,
                                                     m_dispU,
                                                     m_kernelSize );

    CPrewittFilter<float, float>::computeHorizontal( *predDispImg_p,
                                                     m_dispV,
                                                     m_kernelSize, -1 );

    maskInvalidForDisp(*predDispImg_p);

    //// Set buffers so that the last result is stored in m_dispU, m_dispV.
    CFloatImage *  buffU_p[2] = {&m_dispU, &m_auxDispU};
    CFloatImage *  buffV_p[2] = {&m_dispV, &m_auxDispV};

    int w_i = predDispImg_p->getWidth();
    int h_i = predDispImg_p->getHeight();        
    int idx_i = 0;

    for (int i = 0 ; i < (int)m_gaussIters_ui; ++i)
    {
        if (m_filterMF_b)
        {
            CMedianFilter<float,float>::compute5x5( *buffU_p[idx_i], *buffU_p[(idx_i+1)%2] );
            CMedianFilter<float,float>::compute5x5( *buffV_p[idx_i], *buffV_p[(idx_i+1)%2] );
            
            ++idx_i; idx_i%=2;
        }
        
        if (m_filterGauss_b)
        {
            IppiSize dstSize_o;
            int offset_i = 2*w_i + 2;
            dstSize_o.width  = w_i - 4;
            dstSize_o.height = h_i - 4;
        
            IppStatus ippStatus_i;

            ippStatus_i = ippiFilterGauss_32f_C1R ( buffU_p[idx_i]->getData() + offset_i, 
                                                    w_i * sizeof(float),
                                                    buffU_p[(idx_i+1)%2]->getData() + offset_i,
                                                    w_i * sizeof(float),
                                                    dstSize_o, ippMskSize5x5 );
            if (ippStatus_i != ippStsNoErr)
            {
                logger::warn("IPP failed:");
                printIPPError( ippStatus_i );
                return;
            }
        
            ippStatus_i = ippiFilterGauss_32f_C1R ( buffV_p[idx_i]->getData() + offset_i, 
                                                    w_i * sizeof(float),
                                                    buffV_p[(idx_i+1)%2]->getData() + offset_i,
                                                    w_i * sizeof(float),
                                                    dstSize_o, ippMskSize5x5 );
        
            if (ippStatus_i != ippStsNoErr)
            {
                logger::warn("IPP failed:");
                printIPPError( ippStatus_i );
                return;
            }

            ++idx_i; idx_i%=2;
        }
    }

    if (idx_i%2)
    {
        buffU_p[1] -> copyImageContentTo ( *buffU_p[0] );
        buffV_p[1] -> copyImageContentTo ( *buffV_p[0] );   
    }
    
}

void
CNormalPredictorOp::maskInvalidForDisp( const CFloatImage &f_maskImg )
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

    unsigned int clearSize_ui = halfKsize.height * f_maskImg.getWidth();
    float *  ptr1_p = m_dispU.getData();
    float *  ptr2_p = m_dispV.getData();

    /// finally mask the final part of the image.
    for (int i = 0; i < (int)clearSize_ui; ++i, ++ptr1_p, ++ptr2_p)
    {
        *ptr1_p = 0;
        *ptr2_p = 0;
    }

#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)f_maskImg.getHeight()-halfKsize.height; ++i)
    {
        /// max mask size is 255.
        float *slmConcrete_p[255];
        float **slm_p = slmConcrete_p + 127;
            
        /// Set pointers to upper and lower scanlines.
        for (int k = -halfKsize.height; k <= halfKsize.height; ++k)
            slm_p[k] = f_maskImg.getScanline( i + k );

        float *slu_p = m_dispU.getScanline ( i );
        float *slv_p = m_dispV.getScanline ( i );

        /// Set left border to 0.
        for ( int j = 0; j < halfKsize.width; ++j, ++slu_p, ++slv_p )
        {
            *slu_p = 0;
            *slv_p = 0;
        }
 
        for (int j = halfKsize.width; j < (int)f_maskImg.getWidth()-halfKsize.width; ++j, ++slu_p, ++slv_p )
        {
            /// Here the distance to the invalid pixel COULD be computed to further improve the algorithm. 
            /// Once we have found that a pixel is invalid we could continue invalidating neighbours (++j) 
            /// which also are at a distance closer than the kernel window width.
            bool break_b = false;
            for (int k = -halfKsize.height; k <= halfKsize.height && !break_b; ++k)
                for (int l = j-halfKsize.width; l <= j+halfKsize.width && !break_b; ++l)
                    if (slm_p[k][l] <= 0) 
                        break_b = true;

            if ( break_b ) 
            {
                *slu_p = 0;
                *slv_p = 0;
            }
        }
            
        /// Set right border to 0.
        for ( int j = f_maskImg.getWidth()-halfKsize.height; j < (int)f_maskImg.getWidth() ; ++j, ++slu_p, ++slv_p )
        {
            *slu_p = 0;
            *slv_p = 0;
        }            
    }        

    ptr1_p = m_dispU.getScanline(f_maskImg.getHeight()-halfKsize.height);
    ptr2_p = m_dispV.getScanline(f_maskImg.getHeight()-halfKsize.height);
    /// finally mask the final part of the image.
    for (int i = 0; i < (int)clearSize_ui; ++i, ++ptr1_p, ++ptr2_p)
    {
        *ptr1_p = 0;
        *ptr2_p = 0;
    }
}


void 
CNormalPredictorOp::computeFODFrom3DNormals()
{
    const CSphericalRangeImage * sri_p = 
        dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );

    if (!sri_p) return;
    
    double start_d;
    double end_d;

#if defined ( _OPENMP )
    start_d = omp_get_wtime();
#endif

    allocateImages (  );

    if (m_normalMode_e != NCM_LS )
    {
        if (m_normalMode_e != NCM_NxCrossNy2 )
        {   
            CImageConversion::convert<double, float> ( sri_p->getImage(),
                                                       m_rangeImg );
        }
        else
        {
            /// Convert double to float.
            for (int i = 0; i < (int)(*sri_p).getImage().getHeight();++i)
            {
                for (int j = 0; j < (int)(*sri_p).getImage().getWidth();++j)
                {
                    C3DVector pt1;
                    //SSphericalPointData pt2;
                        
                    (*sri_p).image2Local ( j, i, pt1 );
                        
                    m_rangeImg.getScanline(i)[j] = pt1.z();
                }
            }
        }
            
        if (m_applyGauss_b)
        {
            IppiSize roiSize;
                
            roiSize.width  = m_rangeImg.getWidth() - 2;
            roiSize.height = m_rangeImg.getHeight() - 2;
                
            IppStatus status_i = 
                ippiFilterGauss_32f_C1R ( m_rangeImg.getScanline(1)+1,
                                          m_rangeImg.getWidth() * sizeof(float),
                                          m_bufferImg.getScanline(1)+1,
                                          m_bufferImg.getWidth() * sizeof(float),
                                          roiSize,
                                          ippMskSize3x3 );

            /// Now lets check for invalid values and correct the filtered
            /// range with the original.
            int h_i = m_rangeImg.getHeight() - 1;
            int w_i = m_rangeImg.getWidth() - 1;
            for (int i = 1; i < h_i; ++i)
            {
                float * dstm1_p = m_rangeImg.getScanline(i-1);
                float * dst_p   = m_rangeImg.getScanline(i);
                float * dstp1_p = m_rangeImg.getScanline(i+1);
                
                float * src_p   = m_bufferImg.getScanline(i);

                for (int j = 1; j < w_i; ++j)
                {
                    if (dst_p[j])
                    {
                        if ( dstm1_p[j-1] && dstm1_p[j] && dstm1_p[j+1] &&
                             dst_p  [j-1] &&               dst_p  [j+1] &&
                             dstp1_p[j-1] && dstp1_p[j] && dst_p  [j+1] )
                            dst_p[j] = src_p[j];
                    }
                }
            }

            //memcpy ( m_rangeImg.getData (), m_bufferImg.getData (), 
            //         sizeof(float)*m_auxRange.getWidth()*m_auxRange.getHeight());

            if ( status_i != ippStsNoErr)
            {
                printf("IPP failed:\n");
                printIPPError( status_i );
                return;
            }
        }            

        if ( 0 && (m_kernelSize.width <= 3 && 
                   m_kernelSize.height <= 3 ) ) 
        {
            if (!calculateGradientsWithIPP()) return;
        }
        else
        {
            CPrewittFilter<float, float>::computeVertical ( m_rangeImg,
                                                            m_gradXImg,
                                                            m_kernelSize,
                                                            -sri_p->getScale().x );

            CPrewittFilter<float, float>::computeHorizontal ( m_rangeImg,
                                                              m_gradYImg,
                                                              m_kernelSize,
                                                              sri_p->getScale().y );

        }            

        maskInvalid();
        
    }

#if defined ( _OPENMP )
    end_d = omp_get_wtime();
#endif

    printf("double -> float transformation + sobel/prewitt + scale + mask is %f milliseconds\n",
           (end_d-start_d)*1000.);
                
    calculateNormals();
        
    calculateFirstOrderDisp();
}

bool
CNormalPredictorOp::calculateGradientsWithIPP()
{
    const CSphericalRangeImage * sri_p = 
        dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );

    int offset_i = m_gradXImg.getWidth() * (m_kernelSize.height/2) + (m_kernelSize.width/2);

    IppStatus status;
    IppiSize dstRoiSize;
    dstRoiSize.width  = m_gradXImg.getWidth()  - m_kernelSize.width + 1;
    dstRoiSize.height = m_gradXImg.getHeight() - m_kernelSize.height + 1;
            
    float scaleX_f = 1.f, scaleY_f = 1.f;            
            
    scaleY_f = -1.f/sri_p->getScale().y;

    if ( m_applySobel_b )
    {    
        status = ippiFilterSobelHoriz_32f_C1R ( m_rangeImg.getData() + offset_i,
                                                m_rangeImg.getWidth() * sizeof(float),
                                                m_gradYImg.getData() + offset_i,
                                                m_gradYImg.getWidth() * sizeof(float),
                                                dstRoiSize );
        scaleY_f /= 8.f;
    }
    else
    {    
        status = ippiFilterPrewittHoriz_32f_C1R ( m_rangeImg.getData() + offset_i,
                                                  m_rangeImg.getWidth() * sizeof(float),
                                                  m_gradYImg.getData() + offset_i,
                                                  m_gradYImg.getWidth() * sizeof(float),
                                                  dstRoiSize );
        scaleY_f /= 6.f;
    }            
                
    scaleX_f = 1.f/sri_p->getScale().x;
                
    if ( m_applySobel_b )
    {    
        status = ippiFilterSobelVert_32f_C1R ( m_rangeImg.getData() + offset_i,
                                               m_rangeImg.getWidth() * sizeof(float),
                                               m_gradXImg.getData() + offset_i,
                                               m_gradXImg.getWidth() * sizeof(float),
                                               dstRoiSize );
        scaleX_f /= 8.;
    }
    else
    {
        status = ippiFilterPrewittVert_32f_C1R ( m_rangeImg.getData() + offset_i,
                                                 m_rangeImg.getWidth() * sizeof(float),
                                                 m_gradXImg.getData() + offset_i,
                                                 m_gradXImg.getWidth() * sizeof(float),
                                                 dstRoiSize );
        scaleX_f /= 6.;
    }

    if ( status == ippStsNoErr )
        status = ippiMulC_32f_C1IR ( scaleY_f, 
                                     m_gradYImg.getData() + offset_i,
                                     m_gradYImg.getWidth() * sizeof(float),
                                     dstRoiSize );
                
    if (status != ippStsNoErr)
    {
        logger::warn("IPP failed:");
        printIPPError( status );
        //exit(1);
        return false;
    }
                
    if ( status == ippStsNoErr )
        status = ippiMulC_32f_C1IR ( scaleX_f, 
                                     m_gradXImg.getData() + offset_i,
                                     m_gradXImg.getWidth() * sizeof(float),
                                     dstRoiSize );
                
    if (status != ippStsNoErr)
    {
        logger::warn("IPP failed:");
        printIPPError( status );
        //exit(1);
        return false;
    }
    return true;
}


void
CNormalPredictorOp::calculateFirstOrderDisp( )
{
    /// 1.- Compute first order disparities storying the result in two SRIs.
    /// 2.- Reproject the two SRIs to the camera position.
    /// 3.- Apply remapping.
    /// 4.- Filter.

    const C3DVectorImage * worldVectorImg_p = dynamic_cast<C3DVectorImage *>(getInput ( "SRI - World 3D Points" ) );
    const CStereoCamera *  camera_p = dynamic_cast<CStereoCamera *>(getInput ( "Rectified Camera" ) );
    const CVelodyne *      velodyne_p = dynamic_cast<CVelodyne*> ( getInput( "Velodyne Sensor" ) );

    const CSphericalRangeImage * sri_p = dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );
    const CSphericalRangeImage * repSri_p = dynamic_cast<CSphericalRangeImage*> ( getInput( "Reprojected SRI" ) );

    if (!worldVectorImg_p || !camera_p || ! velodyne_p || ! sri_p ) 
        return;

    const float b_f = camera_p -> getBaseline();
    const float bSyx_f = b_f * camera_p -> getFu() / camera_p -> getFv();
    
    int h_i = m_normalImg.getHeight();
    int w_i = m_normalImg.getWidth();
    //// 1.- Compute first order disparities storing the result into two images.

#if defined  ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = 0 ; i < h_i; ++i)
    {
        const C3DVector *  sln_p = m_normalImg.getScanline(i);
        const C3DVector *  slw_p = worldVectorImg_p->getScanline(i);
        const double *     slr_p = sri_p -> getImage().getScanline(i);    
        C3DVector *     firstD_p = m_firstDispUV.getScanline(i);
        
        for (int j = 0 ; j < w_i; ++j, ++sln_p, ++slw_p, ++firstD_p, ++slr_p)
        {
            if (*sln_p)
                firstD_p -> set( b_f/slw_p->z() * sln_p->x()/sln_p->z(),
                                 -bSyx_f/slw_p->z() * sln_p->y()/sln_p->z(),
                                 *slr_p );
            else
                firstD_p -> set(0.,0.,0.);

            if (0)
                printf("for 3D point %f %f %f with normal %f %f %f firstD is %f %f %f\n",
                       slw_p->x(),
                       slw_p->y(),
                       slw_p->z(),
                       sln_p->x(),
                       sln_p->y(),
                       sln_p->z(),
                       firstD_p->x(),
                       firstD_p->y(),
                       firstD_p->z() );
        }
    }

    m_auxRange.clear();
    m_sphDispU.clear();
    m_sphDispV.clear();

    int h2_i = m_sphDispU.getHeight();
    int w2_i = m_sphDispU.getWidth();

    //// 2.- Reproject the two first order disps to the camera position.
    for (int i = 0 ; i < h_i; ++i)
    {
        C3DVector *  slw_p      = worldVectorImg_p -> getScanline(i);
        C3DVector *  firstD_p   = m_firstDispUV.getScanline(i);

        float *      slr_p      = m_auxRange.getData();
        float *      sphDispU_p = m_sphDispU.getData();
        float *      sphDispV_p = m_sphDispV.getData();

        SSphericalPointData sphMeas;
        
        for (int j = 0; j < w_i; ++j, ++slw_p, ++firstD_p )
        {
            if (firstD_p -> z())
            {
                double u_d, v_d;
                int    u_i, v_i;
                
                bool res;            
                res = velodyne_p -> local2Spherical ( *slw_p,
                                                      sphMeas );
                
                res &= repSri_p -> spherical2Image ( sphMeas.azimuth_d,
                                                     sphMeas.elevation_d,
                                                     u_d, v_d);
                
                if (res)
                {
                    u_i = (u_d + 0.5);
                    v_i = (v_d + 0.5);
                    
                    if ( u_i >= 0 && u_i < w2_i && 
                         v_i >= 0 && v_i < h2_i )
                    {
                        int index_i = v_i * w2_i + u_i;
                        float &range_d = slr_p[index_i];
                        
                        /// check occlusions
                        if ( !range_d ||
                             range_d > firstD_p->z() )
                        {
                            range_d = firstD_p->z();
                            sphDispU_p[index_i] = firstD_p->x();
                            sphDispV_p[index_i] = firstD_p->y();
                        }
                    }
                }
            }
        }
    }

    //// Set buffers so that the last result is stored in m_dispU, m_dispV.
    CFloatImage *  buffU_p[2] = {&m_dispU, &m_auxDispU};
    CFloatImage *  buffV_p[2] = {&m_dispV, &m_auxDispV};

    int idx_i = (m_gaussIters_ui * ((m_filterMF_b?1:0) + (m_filterGauss_b?1:0)))%2;
    

    //// 3.- Apply remapping.
    /// Reusing luts from range prediction.
    CFloatImage * lutU_p = dynamic_cast<CFloatImage*> ( getInput( "DispImg Prediction Lut U" ) ); 
    CFloatImage * lutV_p = dynamic_cast<CFloatImage*> ( getInput( "DispImg Prediction Lut V" ) ); 

    if (!lutU_p || !lutV_p)
        return;

    const int interpolation_i = IPPI_INTER_NN;//LINEAR; //IPPI_INTER_CUBIC; 
    IppiSize  srcSize_o;

    srcSize_o.width  = w2_i;
    srcSize_o.height = h2_i;

    IppiRect srcROI_o;
    srcROI_o.x      = 0;
    srcROI_o.y      = 0;
    srcROI_o.width  = srcSize_o.width;
    srcROI_o.height = srcSize_o.height;

    IppiSize dstSize_o;
    dstSize_o.width  = m_dispU.getWidth();
    dstSize_o.height = m_dispU.getHeight();

    IppStatus ippStatus_i;

    /// Remap Image.
    ippStatus_i =
        ippiRemap_32f_C1R( (Ipp32f*) m_sphDispU.getData ( ),         // src
                           srcSize_o,                                // size
                           srcROI_o.width * sizeof(float),           // step src
                           srcROI_o,                                 // roi src
                           (Ipp32f*) lutU_p->getData ( ),            // pxMap
                           dstSize_o.width * sizeof(float),          // step pxMap
                           (Ipp32f*) lutV_p->getData ( ),            // pyMap
                           dstSize_o.width * sizeof(float),          // step pyMap
                           (Ipp32f*) buffU_p[idx_i]->getData(),      // dest
                           dstSize_o.width * sizeof(float),          // step dest
                           dstSize_o,                                // roi dest
                           interpolation_i );                        // interpolation

    if (ippStatus_i != ippStsNoErr)
    {
        logger::warn("IPP failed:");
        printIPPError( ippStatus_i );
        return;
    }

    /// Remap Image.
    ippStatus_i =
        ippiRemap_32f_C1R( (Ipp32f*) m_sphDispV.getData ( ),         // src
                           srcSize_o,                                // size
                           srcROI_o.width * sizeof(float),           // step src
                           srcROI_o,                                 // roi src
                           (Ipp32f*) lutU_p->getData ( ),            // pxMap
                           dstSize_o.width * sizeof(float),          // step pxMap
                           (Ipp32f*) lutV_p->getData ( ),            // pyMap
                           dstSize_o.width * sizeof(float),          // step pyMap
                           (Ipp32f*) buffV_p[idx_i]->getData(),      // dest
                           dstSize_o.width * sizeof(float),          // step dest
                           dstSize_o,                                // roi dest
                           interpolation_i );                        // interpolation

    if (ippStatus_i != ippStsNoErr)
    {
        logger::warn("IPP failed:");
        printIPPError( ippStatus_i );
        return;
    }

    //// 4.- Filter
    /// This filtering can be applied before mapping to the m_sphDispU/V images.
    for (int i = 0 ; i < (int)m_gaussIters_ui; ++i)
    {
        if (m_filterMF_b)
        {
            CMedianFilter<float,float>::compute5x5( *buffU_p[idx_i], *buffU_p[(idx_i+1)%2] );
            CMedianFilter<float,float>::compute5x5( *buffV_p[idx_i], *buffV_p[(idx_i+1)%2] );
            
            ++idx_i; idx_i%=2;
        }
        
        if (m_filterGauss_b)
        {
            w_i = m_dispU.getWidth();
            h_i = m_dispU.getHeight();        
            
            int offset_i = 2*w_i + 2;
            dstSize_o.width  = w_i - 4;
            dstSize_o.height = h_i - 4;
        
            ippStatus_i = ippiFilterGauss_32f_C1R ( buffU_p[idx_i]->getData() + offset_i, 
                                                    w_i * sizeof(float),
                                                    buffU_p[(idx_i+1)%2]->getData() + offset_i,
                                                    w_i * sizeof(float),
                                                    dstSize_o, ippMskSize5x5 );
            if (ippStatus_i != ippStsNoErr)
            {
                logger::warn("IPP failed:");
                printIPPError( ippStatus_i );
                return;
            }
        
            ippStatus_i = ippiFilterGauss_32f_C1R ( buffV_p[idx_i]->getData() + offset_i, 
                                                    w_i * sizeof(float),
                                                    buffV_p[(idx_i+1)%2]->getData() + offset_i,
                                                    w_i * sizeof(float),
                                                    dstSize_o, ippMskSize5x5 );
        
            if (ippStatus_i != ippStsNoErr)
            {
                logger::warn("IPP failed:");
                printIPPError( ippStatus_i );
                return;
            }

            ++idx_i; idx_i%=2;
        }
    }
}


void
CNormalPredictorOp::calculateNormals()
{
    const CSphericalRangeImage * sri_p = 
        dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );

    ////// ERRRROR HERE

    C3DVectorImage * localVectorPts_p = 
        dynamic_cast<C3DVectorImage *>(getInput ( "SRI - Sensor 3D Points" ) );

    if (!sri_p || !localVectorPts_p)
        return;

    double start_d;
    double end_d;
#if defined ( _OPENMP )
    start_d = omp_get_wtime();
#endif


    const CVelodyne velodyne = sri_p -> getVelodyne();
    
    C3DMatrix rotation = velodyne.getRotation();
    //rotation.transpose();
    

    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

    std::vector<double> cosAz_v     ( m_rangeImg.getWidth(), 0. );
    std::vector<double> cos2Az_v    ( m_rangeImg.getWidth(), 0. );
    std::vector<double> sinAz_v     ( m_rangeImg.getWidth(), 0. );
    std::vector<double> tanAz_v     ( m_rangeImg.getWidth(), 0. );
    std::vector<double> sinXcosAz_v ( m_rangeImg.getWidth(), 0. );
    std::vector<C3DMatrix> rotAz_v  ( m_rangeImg.getWidth() );

    std::vector<double> cosEl_v ( m_rangeImg.getHeight(), 0. );
    std::vector<double> sinEl_v ( m_rangeImg.getHeight(), 0. );
    std::vector<double> tanEl_v ( m_rangeImg.getHeight(), 0. );

    cosAz_v.resize     ( m_rangeImg.getWidth(), 0. );
    cos2Az_v.resize    ( m_rangeImg.getWidth(), 0. );
    sinAz_v.resize     ( m_rangeImg.getWidth(), 0. );
    tanAz_v.resize     ( m_rangeImg.getWidth(), 0. );
    sinXcosAz_v.resize ( m_rangeImg.getWidth(), 0. );

    cosEl_v.resize  ( m_rangeImg.getHeight(), 0. );
    sinEl_v.resize  ( m_rangeImg.getHeight(), 0. );
    tanEl_v.resize  ( m_rangeImg.getHeight(), 0. );

    // calculate first cosinus of azimuth and store it in a luv.
    for (int j = 0 ; j < (int)m_rangeImg.getWidth(); ++j)
    {
        double angle_d = sri_p -> column2Azimuth ( j );
        
        cosAz_v[j]     = cos(angle_d);
        cos2Az_v[j]    = cosAz_v[j] * cosAz_v[j];
        sinAz_v[j]     = sin(angle_d);
        tanAz_v[j]     = sinAz_v[j] / cosAz_v[j];
        sinXcosAz_v[j] = sinAz_v[j] * cosAz_v[j];
        rotAz_v[j].loadIdentity();
        rotAz_v[j].rotateY ( angle_d );
    }   

    // calculate first cosinus of azimuth and store it in a luv.
    for (int i = 0 ; i < (int)m_rangeImg.getHeight(); ++i)
    {
        double angle_d = sri_p -> row2Elevation ( i );
        
        cosEl_v[i] = cos(angle_d);
        sinEl_v[i] = sin(angle_d);
        tanEl_v[i] = sinEl_v[i] / cosEl_v[i];
    }   

    //const double scaleAz_d = sri_p->getScale().x;
    //const double scaleEl_d = sri_p->getScale().y;
    
    m_normalImg.clear();
    
    if (0)
        printf("\ngrep1\n");

#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        float *      slx_p = m_gradXImg.getScanline ( i ) + halfKsize.width;
        float *      sly_p = m_gradYImg.getScanline ( i ) + halfKsize.width;
        float *      slr_p = m_rangeImg.getScanline ( i ) + halfKsize.width;
            
        C3DVector *  sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
        C3DVector *  slp_p = localVectorPts_p -> getScanline ( i ) + halfKsize.width;
            
        double elevation_d = sri_p -> row2Elevation  ( i );
        
        C3DMatrix rotElevation;
        rotElevation.loadIdentity();
        rotElevation.rotateX ( elevation_d );

        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++slx_p, ++sly_p, ++slr_p, ++sln_p, ++slp_p )
        {
            if ( *slx_p == GT_INVALID ) continue;
                
            double azimuth_d = sri_p -> column2Azimuth  ( j );
            double sign_d =  cosEl_v[i]>0?1:-1;

            double &sinAz_d = sinAz_v[j];
            double &cosAz_d = cosAz_v[j];
            double &sinEl_d = sinEl_v[i];
            double &cosEl_d = cosEl_v[i];            

            switch ( m_normalMode_e )
            {
                case NCM_3DDIFF:
                {
                    double unity_d=1;
                    
                    /// Del operator
                    double aux_d = unity_d * *slr_p * cosEl_d - *sly_p * sinEl_d;
                    
                    sln_p -> set ( -(*slx_p * cosAz_d + cosEl_d * sinAz_d * aux_d),
                                   cosEl_d * (*sly_p * cosEl_d + unity_d * sinEl_d * *slr_p),
                                   (*slx_p * sinAz_d - cosEl_d * cosAz_d * aux_d ));

                    //if (sln_p -> dotProduct ( *slp_p ) > 0 )
                    //    *sln_p = - *sln_p;
                            
                    if (0) // evaluate deviation.
                        printf("%f %f %f %f %f grep1\n",
                               azimuth_d, elevation_d, 
                               sln_p->x(),
                               sln_p->y(),
                               sln_p->z() );
                    
                    sln_p -> normalize();

                    if (0)
                    {
                        printf ("DIFF (%i, %i) (%f, %f, %f)  (az: %f el: %f gradAz: %f gradEl: %f range: %f)\n",
                                i,j, 
                                sln_p->x(),
                                sln_p->y(),
                                sln_p->z(), azimuth_d, elevation_d, *slx_p, *sly_p, *slr_p );
                    }
                }
                break;

                case NCM_NxCrossNy1:
                {
                    sln_p -> set ( -*slx_p,
                                   *sly_p/cosAz_d,
                                   -*slr_p);
                    
                    sln_p->normalize();
                    
                    C3DMatrix rot1;
                    rot1.loadIdentity();
                    //C3DVector axis  = slp_p -> crossProduct ( C3DVector ( 0, 0, -1 ) );
                    C3DVector axis  = C3DVector(-slp_p->y(), slp_p->x(), 0 );
                    axis.normalize();
                    
                    double  angle_d = acos(slp_p->z() / *slr_p);
                    
                    rot1.rotateAxis( axis, angle_d );
                    
                    *sln_p = rot1 * *sln_p;   

                    if (0)
                    {
                        printf ("CROS (%i, %i) (%f, %f, %f)  (az: %f el: %f gradAz: %f gradEl: %f range: %f)\n",
                                i,j, 
                                sln_p->x(),
                                sln_p->y(),
                                sln_p->z(), azimuth_d, elevation_d, *slx_p, *sly_p, *slr_p );
                    }
                }
                break;
                
                case NCM_NxCrossNy2:
                {
                    if (0)    
                    {
                        sln_p -> set ( -*slx_p * cosAz_d,
                                       *sly_p * sign_d,
                                       -*slr_p * cosEl_d );

                    }
                    else
                    {
                        if (0)
                        {
                            C3DVector n1, n2;
                            n1.at(0) = cosAz_d;
                            n1.at(1) = 0;//sinAz_d;
                            n1.at(2) = (-*slx_p * cosAz_d)/(*slr_p * cosEl_d);
                                
                            n2.at(0) = 0;//sinEl_d;
                            n2.at(1) = cosEl_d;
                            n2.at(2) = (*sly_p * sign_d)/(*slr_p * cosAz_d);
                                
                            printf("n1 is ");
                            n1.print();
                            printf("n2 is ");
                            n2.print();
                                
                            C3DMatrix rot1, rot2;
                            rot1.loadIdentity();
                            rot2.loadIdentity();
                            rot1.rotateY( azimuth_d );
                            rot2.rotateX( elevation_d );
                            
                            //n1 = rot1 * n1;
                            //n2 = rot2 * n2;

                            *sln_p = n1.crossProduct(n2);
                        }
                        else
                        {
                            C3DVector n1, n2;
                            n1.at(0) = (-*slx_p * cosAz_d / cosEl_d + *sly_p * sinAz_d * sinEl_d * sign_d) / *slr_p;// + cosEl_d * cosEl_d * sinAz_d * cosAz_d;
                            n1.at(1) = (*sly_p * cosEl_d * sign_d) / *slr_p;// + cosEl_d * cosEl_d * sinAz_d * cosAz_d;
                            n1.at(2) = 1.;
                            
                            *sln_p = n1;
                            //(*sln_p).at(1) = -(*sln_p).at(1);
                        }
                        //if (1)
                        // {
                         //   printf ("CROS (%i, %i) (%f, %f, %f)\n",
                        //            i,j, 
                        //            sln_p->x(),
                        //            sln_p->y(),
                        //            sln_p->z() );
                        // }
                           


                    }

                    sln_p -> normalize();
                          
                }
                break;
                    
                case NCM_Jacobian1:
                {
                    sln_p -> set ( (*slr_p * *slx_p * cosAz_d * cosEl_d - cosEl_d * sinAz_d - *slr_p * *sly_p * sinAz_d * sinEl_d),
                                   -(*slr_p * *sly_p * cosEl_d - sinEl_d),
                                   (-cosAz_d * cosEl_d - *slr_p * *slx_p * cosEl_d * sinAz_d - *slr_p * *sly_p * cosAz_d * sinEl_d ));
                            
                    //sln_p -> set ( -(*slr_p * cosEl_d * (*slx_p * cosAz_d + sinAz_d)),
                    //               (*slr_p * (*sly_p * cosEl_d + ( cosAz_d - *slx_p * sinAz_d) * sinEl_d)),
                    //               -(-cosAz_d * cosEl_d + *slr_p * cosEl_d * sinAz_d + *sly_p * sinEl_d ));

                    sln_p -> normalize(); 
                                 
                    if (0)
                    {
                        printf ("JAC1 (%i, %i) (%f, %f, %f)\n",
                                i,j, 
                                sln_p->x(),
                                sln_p->y(),
                                sln_p->z() );
                    }
                             
                }
                break;
                        
                case NCM_Jacobian2:
                {
                    if ( 1 ) // orig
                    {
                        double val1_d = cosEl_d + *slr_p * *sly_p * sinEl_d;
                        
                        sln_p -> set ( *slr_p * *slx_p * cosAz_d * cosEl_d - sinAz_d * val1_d,
                                       -(*slr_p * *sly_p * cosEl_d - sinEl_d),
                                       (-*slr_p * *slx_p * cosEl_d * sinAz_d - cosAz_d * val1_d ));
                        sln_p -> normalize(); 
                    }
                    else
                    {
                        double val1_d = (-1+*slr_p) * cosEl_d - *slr_p * *sly_p * sinEl_d;
                        
                        sln_p -> set ( *slr_p * *slx_p * cosAz_d * cosEl_d + sinAz_d * val1_d,
                                       sinEl_d - *slr_p * (*sly_p * cosEl_d + sinEl_d),
                                       (-*slr_p * *slx_p * cosEl_d * sinAz_d + cosAz_d * val1_d ));
                        sln_p -> normalize(); 

                    }
                    
                    if (0)
                        printf ("JAC2 (%i, %i) (%f, %f, %f)\n",
                                i,j, 
                                sln_p->x(),
                                sln_p->y(),
                                sln_p->z() );
                }
                break;

                case NCM_Rotation:
                {
                }
                break;

                case NCM_LS:    
                {
                    int refpnt_i =  (int)((m_kernelSize.width * m_kernelSize.height))/2;
                    C3DVector plane;

                    std::vector<C3DVector> points_v;
                    for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
                    {
                        C3DVector * sl = localVectorPts_p -> getScanline ( i+ii );
                        for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
                            points_v.push_back(sl[j+jj]);
                    }
                            
                    computeLeastSquarePlane ( points_v, *sln_p );
                    
                    if (sln_p -> dotProduct ( points_v[refpnt_i] ) > 0 )
                        *sln_p = - *sln_p;
                    
                    sln_p -> normalize(); 

                    if (1)
                        printf ("LS   (%i, %i) (%f, %f, %f)\n",
                                i,j, 
                                sln_p->x(),
                                sln_p->y(),
                                sln_p->z() );
                }
                break;

                default:
                    sln_p -> clear();
                    break;
            }

            *sln_p = rotation * *sln_p;

            //sln_p -> print();                    
            //printf("for (%i, %i) gradX = %f gradY = %f range = %f normal = (%f, %f, %f)\n",
            //       i, j, *slx_p / 8.f, *sly_p / 8.f, *slr_p, sln_p->x(), sln_p->y(), sln_p->z() );
        }

        //printf("For %i %i normal is %f %f %f\n",
        //       j, i, sln_p->x(),sln_p->y(),sln_p->z());            
    }

#if defined ( _OPENMP )
    end_d = omp_get_wtime();
#endif
    
    printf("Normal computation timne is %f milliseconds\n",
           (end_d-start_d)*1000.);
}

void
CNormalPredictorOp::computeLeastSquarePlane ( const std::vector<C3DVector> & f_points_v,
                                              C3DVector &                    fr_plane )
{
    double sumxx=0., sumyy=0., sumzz=0., sumxy=0., sumxz=0., sumyz=0.;
    
    C3DVector vectorSum;
    vectorSum.clear();
    
    int n = f_points_v.size();
    
    for (int i = 0; i < n; ++i)
    {
        vectorSum += f_points_v[i];
        sumxx     += f_points_v[i].x() * f_points_v[i].x();
        sumyy     += f_points_v[i].y() * f_points_v[i].y();
        sumzz     += f_points_v[i].z() * f_points_v[i].z();
        sumxy     += f_points_v[i].x() * f_points_v[i].y();
        sumxz     += f_points_v[i].x() * f_points_v[i].z();
        sumyz     += f_points_v[i].y() * f_points_v[i].z();
    }

    C3DMatrix A;
    C3DVector vectorAvg = vectorSum / n;
    
    A.at(0,0) = sumxx - vectorSum.x()/n * vectorSum.x();
    A.at(1,1) = sumyy - vectorSum.y()/n * vectorSum.y();
    A.at(2,2) = sumzz - vectorSum.z()/n * vectorSum.z();
    A.at(0,1) = A.at(1,0) = ( sumxy - vectorAvg.y() * vectorSum.x() );
    A.at(0,2) = A.at(2,0) = ( sumxz - vectorAvg.z() * vectorSum.x() );
    A.at(1,2) = A.at(2,1) = ( sumyz - vectorAvg.z() * vectorSum.y() );
    
    double m = A.trace()/3;

    C3DMatrix I;
    I.loadIdentity();

    C3DMatrix K = A - I * m;

    double q = K.determinant()/2;
 
    double p = 0;
    
    for ( int i = 0; i < 9; ++i)
        p += K.at(i) * K.at(i);

    p = p/6;
    
    double phi = 1./3. * atan( sqrt ( p*p*p - q*q ) / q );

    if ( phi < 0 )
        phi += M_PI;
        
    double eig1 = m + 2 * sqrt(p) * cos(phi);
    double eig2 = m - sqrt(p) * ( cos(phi) + sqrt(3) * sin(phi) );
    double eig3 = m - sqrt(p) * ( cos(phi) - sqrt(3) * sin(phi) );

    double eigv = std::min(std::min(eig1, eig2), eig3);
    
    A.at(0,0) -= eigv;
    A.at(1,1) -= eigv;
    A.at(2,2) -= eigv;
    
    for (int i = 0 ; i < 3; ++i)
    {
        double scale_d = A.at(i,i);
        for (int j = 0; j < 3; ++j)
            A.at(i,j) /= scale_d;

        for (int k = i+1; k < 3; ++k)
        {
            if (k != i)
            {
                double val_d = A.at(k, i);

                for (int j = i; j < 3; ++j)
                    A.at(k,j) -= val_d * A.at(i,j);
            }
        }
    }

    fr_plane.set( -A.at(0,1) * A.at(1,2) + A.at(0,2),
                  A.at(1,2),
                  -1.);

    //fr_plane = - fr_plane;
    
    //if (eigv)
    //    fr_plane *= -1;
    //A.print();
    //fr_plane.print();
    
}


void
CNormalPredictorOp::maskInvalid()
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);


    unsigned int clearSize_ui = halfKsize.height * m_rangeImg.getWidth();
    float *  ptr1_p = m_gradXImg.getData();
    float *  ptr2_p = m_gradYImg.getData();

    /// finally mask the final part of the image.
    for (int i = 0; i < (int)clearSize_ui; ++i, ++ptr1_p, ++ptr2_p)
    {
        *ptr1_p = GT_INVALID;
        *ptr2_p = GT_INVALID;
    }

#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        /// max mask size is 15.
        float *slmConcrete_p[15];
        float **slm_p = slmConcrete_p + 7;
            
        /// Set pointers to upper and lower scanlines.
        for (int k = -halfKsize.height; k <= halfKsize.height; ++k)
            slm_p[k] = m_rangeImg.getScanline( i + k );

        float *slx_p = m_gradXImg.getScanline ( i );
        float *sly_p = m_gradYImg.getScanline ( i );
            
        /// Set left border to 0.
        for ( int j = 0; j < halfKsize.width; ++j, ++slx_p, ++sly_p )
        {
            *slx_p = GT_INVALID;
            *sly_p = GT_INVALID;
        }
 
        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; ++j, ++slx_p, ++sly_p )
        {
            /// Here the distance to the invalid pixel COULD be computed to further improve the algorithm. 
            /// Once we have found that a pixel is invalid we could continue invalidating neighbours (++j) 
            /// which also are at a distance closer than the kernel window width.
            bool break_b = false;
            for (int k = -halfKsize.height; k <= halfKsize.height && !break_b; ++k)
                for (int l = j-halfKsize.width; l <= j+halfKsize.width && !break_b; ++l)
                    if (slm_p[k][l] <= 0) 
                        break_b = true;
            if ( break_b ) 
            {
                *slx_p = GT_INVALID;
                *sly_p = GT_INVALID;
            }
        }

        /// Set pointers to upper and lower scanlines.
        //for (int k = -halfKsize.height; k <= halfKsize.height; ++k)
        //    ++slm_p[k];
            
        /// Set right border to 0.
        for ( int j = m_rangeImg.getWidth()-halfKsize.height; j < (int)m_rangeImg.getWidth() ; ++j, ++slx_p, ++sly_p )
        {
            *slx_p = GT_INVALID;
            *sly_p = GT_INVALID;
        }            
    }        

    ptr1_p = m_gradXImg.getScanline(m_rangeImg.getHeight()-halfKsize.height);
    ptr2_p = m_gradYImg.getScanline(m_rangeImg.getHeight()-halfKsize.height);
    /// finally mask the final part of the image.
    for (int i = 0; i < (int)clearSize_ui; ++i, ++ptr1_p, ++ptr2_p)
    {
        *ptr1_p = GT_INVALID;
        *ptr2_p = GT_INVALID;
    }
}


void
CNormalPredictorOp::allocateImages ( )
{
    const CSphericalRangeImage * sri_p = 
        dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );
    if (! sri_p ) return;
   
    S2D<unsigned int> size = sri_p -> getImage().getSize();

    if ( size != m_rangeImg.getSize() )
    {
        m_rangeImg.freeMemory();
        m_rangeImg.setSize( size );
        m_rangeImg.ensureAllocation();
        
        m_bufferImg.freeMemory();
        m_bufferImg.setSize( size );
        m_bufferImg.ensureAllocation();
        
        m_gradYImg.freeMemory();
        m_gradYImg = m_rangeImg;
        m_gradYImg.ensureAllocation();
        
        m_gradXImg.freeMemory();
        m_gradXImg = m_rangeImg;
        m_gradXImg.ensureAllocation();
        
        m_normalImg.freeMemory();
        m_normalImg.setSize ( size );
        m_normalImg.ensureAllocation();
        
        m_firstDispUV.freeMemory();
        m_firstDispUV.setSize ( size );
        m_firstDispUV.ensureAllocation();
    }
    
    sri_p = dynamic_cast<CSphericalRangeImage*> ( getInput( "Reprojected SRI" ) );

    if (! sri_p ) return;
    size = sri_p -> getImage().getSize();
    
    if (size != m_sphDispU.getSize())
    {
        printf("%ix%i newSize= %ix%i\n", 
               m_sphDispU.getSize().width, m_sphDispU.getSize().height,
               size.width, size.height );
        
        m_sphDispU.freeMemory();
        m_sphDispU.setSize ( size );
        m_sphDispU.ensureAllocation();
        
        m_sphDispV.freeMemory();
        m_sphDispV.setSize ( size );
        m_sphDispV.ensureAllocation();
        
        m_auxRange.freeMemory();
        m_auxRange.setSize ( size );
        m_auxRange.ensureAllocation();
    }
    
    CUShortImage * imgL_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Left Image" ) );
    if (!imgL_p) return;
    
    size = imgL_p -> getSize();

    if (size != m_dispU.getSize())
    {
        m_dispU.freeMemory();
        m_dispU.setSize ( size );
        m_dispU.ensureAllocation();
        
        m_dispV.freeMemory();
        m_dispV.setSize ( size );
        m_dispV.ensureAllocation();

        m_auxDispU.freeMemory();
        m_auxDispU.setSize ( size );
        m_auxDispU.ensureAllocation();
        
        m_auxDispV.freeMemory();
        m_auxDispV.setSize ( size );
        m_auxDispV.ensureAllocation();
    }
}


/// Show event.
bool CNormalPredictorOp::show()
{
    CDrawingList *  list1_p; //, * list2_p;

    list1_p = getDrawingList ("Vertical Gradient");
    list1_p -> clear();

    if ( list1_p -> isVisible() )
        list1_p -> addColorEncImage ( &m_gradYImg,
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncGrad, 
                                      1.f,
                                      false );

    list1_p = getDrawingList ("Horizontal Gradient");
    list1_p -> clear();

    if ( list1_p -> isVisible() )
        list1_p -> addColorEncImage ( &m_gradXImg,
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncGrad, 
                                      1.f,
                                      false );
    
    list1_p = getDrawingList ("Range/Depth Image");
    list1_p -> clear();

    CColorEncoding * colorEnc_p =
        dynamic_cast<CColorEncoding *>(getInput ( "SRI - Color encoding" ) );

    if ( list1_p -> isVisible() && colorEnc_p )
        list1_p -> addColorEncImage ( &m_rangeImg,
                                      0, 0, 
                                      800, 600, 
                                      *colorEnc_p, 
                                      1.f,
                                      false );
    
    list1_p = getDrawingList ("First Order Disp U");
    list1_p -> clear();

    if ( list1_p -> isVisible() )
        list1_p -> addColorEncImage ( &m_dispU,
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncFirstDisp, 
                                      1.f,
                                      false );

    list1_p = getDrawingList ("First Order Disp V");
    list1_p -> clear();

    if ( list1_p -> isVisible() )
        list1_p -> addColorEncImage ( &m_dispV,
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncFirstDisp, 
                                      1.f,
                                      false );

    showNormalImage();    

#if defined HAVE_QGLVIEWER
    show3D();
#endif
    //show3D();

    return COperator::show();
}

void
CNormalPredictorOp::show3D()
{
#if defined HAVE_QGLVIEWER

    
    if (m_3dViewer_p && m_show3DNormals_b)
    {
        //m_3dViewer_p->clear();
        
        C3DVectorImage * worldVectorImg_p;

        if (m_show3DLocal_b ) 
            worldVectorImg_p = dynamic_cast<C3DVectorImage *>(getInput ( "SRI - Sensor 3D Points" ) );
        else
            worldVectorImg_p = dynamic_cast<C3DVectorImage *>(getInput ( "SRI - World 3D Points" ) );

        C3DVector *  sln_p = m_normalImg.getData();

        CUShortImage  * leftImg_p = dynamic_cast<CUShortImage  *>(getInput ( "Rectified Left Image" ) );

        C3DVectorImage * leftCamVectorImg_p = 
            dynamic_cast<C3DVectorImage *>(getInput ( "SRI - Left Camera Projections" ) );

        const CSphericalRangeImage * sri_p = 
            dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );

        C3DMatrix rotation;
        if ( sri_p ) 
            rotation = sri_p->getVelodyne().getRotation();
        else
            rotation.loadIdentity();

        rotation.transpose();
        
        //float *      slr_p = m_rangeImg.getScanline ( i )  + halfKsize.width;
        
        if ( worldVectorImg_p && leftCamVectorImg_p && leftImg_p )
        {
            const int widthL_i  = leftImg_p -> getWidth();
            const int heightL_i = leftImg_p -> getHeight();

            unsigned int size_ui = ( worldVectorImg_p->getWidth() * 
                                     worldVectorImg_p->getHeight() );
            
            C3DVector *  slw_p         = worldVectorImg_p   -> getScanline (0);
            C3DVector * leftCamImg_p   = leftCamVectorImg_p -> getScanline(0);

            unsigned short int *limg_p = (unsigned short int *) leftImg_p ->getData();

            for (unsigned int i = 0 ; i < size_ui; ++i, ++slw_p, ++sln_p, ++leftCamImg_p)
            {
                if ( *slw_p )
                {
                    SRgb color;
                    //printf("Adding a point (%f %f %f) %p\n",  slw_p->x()/1000 ,slw_p->y()/1000 ,slw_p->z()/1000, slw_p );
                    //*slw_p /= 1000;
                    int u, v;
                    u = (int)leftCamImg_p->x();
                    v = (int)leftCamImg_p->y();
                    
                    if (u >= 0 && u < widthL_i && 
                        v >= 0 && v < heightL_i )
                    {
                        unsigned char val = limg_p[v*widthL_i+u] / 256;
                        color = SRgb ( val, val, val );
                    } 
                    else
                        color = SRgb( 0,255,0 );

                    C3DVector vec;

                    if (!*sln_p)
                        vec.clear();
                    else
                        if ( m_show3DLocal_b ) 
                            vec = rotation * *sln_p * m_vectorLength_d;
                        else
                            vec = *sln_p *  m_vectorLength_d;

                    m_3dViewer_p -> addPoint ( *slw_p, color, vec );
                }
                
            }
        }
    }

#endif
}

void
CNormalPredictorOp::showNormalImage()
{
    CDrawingList *  list_p;
    
    list_p = getDrawingList ("Left Image Normal Overlay");

    list_p -> clear();
    list_p -> setLineColor ( SRgb ( 255, 255, 255 ) );
                
    if ( !list_p -> isVisible() ) return;
    
    const CStereoCamera * camera_p  = 
        dynamic_cast<CStereoCamera *>(getInput ( "Rectified Camera" ) );
    
    /// Vector of Sensor 3D Points.
    C3DVectorImage * leftCamVectorImg_p = 
        dynamic_cast<C3DVectorImage *>(getInput ( "SRI - Left Camera Projections" ) );
    
    C3DVectorImage * worldVectorImg_p = 
        dynamic_cast<C3DVectorImage *>(getInput ( "SRI - World 3D Points" ) );
    
    CUShortImage *  limg_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Left Image" ) );

    if ( !camera_p || 
         !leftCamVectorImg_p || 
         !worldVectorImg_p || 
         !limg_p ) return;
    
    float imgScaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );

    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);
        
#if 0 && defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        float *      slr_p = m_rangeImg.getScanline ( i )  + halfKsize.width;
        C3DVector *  sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
        C3DVector *  sli_p = leftCamVectorImg_p -> getScanline ( i ) + halfKsize.width;
        C3DVector *  slw_p = worldVectorImg_p   -> getScanline ( i ) + halfKsize.width;

        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++sln_p, ++sli_p, ++slw_p, ++slr_p )
        {
            if ( *slr_p <= 0 || (!sln_p->x() && !sln_p->y() && !sln_p->z()) ||
                 sli_p->x() < 0 || 
                 sli_p->x() > limg_p->getWidth() || 
                 sli_p->y() < 0 || 
                 sli_p->y() > limg_p->getHeight() )
                continue;
            
            //C3DVector &vec1 = *slw_p;// + (*sln_p * m_vectorLength_d);
            C3DVector vec2 = *slw_p + (*sln_p * m_vectorLength_d);

            //C3DVector normArrow( sln_p -> z(), 0, sln_p -> x() );
            const C3DVector dir = C3DVector(0, 0, 1);
            C3DVector normArrow = sln_p->crossProduct ( dir );
            normArrow.normalize();

            if (!normArrow)
                normArrow.set(1, 0, 0);


            C3DVector arrowBase = vec2 - *sln_p * m_vectorLength_d * 0.1;
            C3DVector arrow1    = arrowBase + normArrow * m_vectorLength_d * 0.05;
            C3DVector arrow2    = arrowBase - normArrow * m_vectorLength_d * 0.05;            

            C3DVector proj2, projArrow0, projArrow1, projArrow2;
            
            bool res1_b, res2_b;
            
            //res_b = camera_p -> world2Image ( vec1, proj1 );
            //if (res_b)
            res1_b  = camera_p -> world2Image ( arrowBase, proj2 );
            res2_b  = camera_p -> world2Image ( vec2,      projArrow0 );
            res2_b &= camera_p -> world2Image ( arrow1,    projArrow1 );
            res2_b &= camera_p -> world2Image ( arrow2,    projArrow2 );

            //arrowBase.print();
            //vec2.print();
            //arrow1.print();
            //arrow2.print();
            
            if ( res1_b )
            {
                SRgb color;
                if (res2_b )
                {
                    switch ( m_encodingMode_e )
                    {
                        case CEM_VERTICAL:
                            m_colorEncNormal.colorFromValue ( atan2(sln_p->y(), sln_p->z() ), color );
                            break;
                            
                        case CEM_HORIZONTAL:
                            m_colorEncNormal.colorFromValue ( atan2(sln_p->x(), sln_p->z() ), color );
                            break;
                            
                        case CEM_NORMAL_X:
                            m_colorEncNormal.colorFromValue ( sln_p->x(), color );
                            break;
                            
                        case CEM_NORMAL_Y:
                            m_colorEncNormal.colorFromValue ( sln_p->y(), color );
                            break;
                            
                        case CEM_NORMAL_Z:
                        default:
                            m_colorEncNormal.colorFromValue ( sln_p->z(), color );
                            break;
                    }
                }
                else
                    color.set(255,255,255);
                    
                list_p -> setLineColor ( color );
                
                if (res2_b )
                    list_p -> addTriangle ( projArrow0.x() / imgScaleFactor_f, projArrow0.y() / imgScaleFactor_f,
                                            projArrow1.x() / imgScaleFactor_f, projArrow1.y() / imgScaleFactor_f,
                                            projArrow2.x() / imgScaleFactor_f, projArrow2.y() / imgScaleFactor_f );
                
                list_p -> addLine ( sli_p->x() / imgScaleFactor_f, sli_p->y() / imgScaleFactor_f,
                                    proj2.x() / imgScaleFactor_f,   proj2.y() / imgScaleFactor_f);   
            }
        }
    }
}


void 
CNormalPredictorOp::keyPressed ( CKeyEvent * f_event_p )
{
   return COperator::keyPressed ( f_event_p );    
}

void 
CNormalPredictorOp::mouseMoved ( CMouseEvent * f_event_p )
{
    const CDrawingList *  list1_p =  getDrawingList ("Horizontal Gradient");
    const CDrawingList *  list2_p =  getDrawingList ("Vertical Gradient");

    int dispWidth_i  = 800;
    int dispHeight_i = 600;
    
    const CSphericalRangeImage * sri_p = 
        dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );

    if ( sri_p &&
         ( ( f_event_p -> displayScreen == list1_p->getPosition() &&
             list1_p -> isVisible() ) || 
           ( f_event_p -> displayScreen == list2_p->getPosition() &&
             list2_p -> isVisible() ) ) )
    {
        const int w_ui = m_gradXImg.getWidth();
        const int h_ui = m_gradXImg.getHeight();

        int x_i = (int)(f_event_p -> posInScreen.x / dispWidth_i  * w_ui + .5);
        int y_i = (int)(f_event_p -> posInScreen.y / dispHeight_i * h_ui + .5);
        
        if ( x_i >=0 && x_i < w_ui && 
             y_i >=0 && y_i < h_ui )
        {
            printf("(v,u) (%i, %i) El: %f rad (%f deg) Az: %f (%f deg)Range: %f Dx: %f Dy: %f\n",
                   y_i, x_i, 
                   sri_p->row2Elevation ( y_i ),
                   sri_p->row2Elevation ( y_i ) * 180 / M_PI,
                   sri_p->column2Azimuth ( x_i ),
                   sri_p->column2Azimuth ( x_i ) * 180 / M_PI,
                   sri_p->getImage().getScanline(y_i)[x_i],
                   m_gradXImg.getScanline(y_i)[x_i],
                   m_gradYImg.getScanline(y_i)[x_i] );

        }
    }

    list1_p =  getDrawingList ("First Order Disp U");
    list2_p =  getDrawingList ("First Order Disp V");

    if ( ( f_event_p -> displayScreen == list1_p->getPosition() &&
           list1_p -> isVisible() ) || 
         ( f_event_p -> displayScreen == list2_p->getPosition() &&
           list2_p -> isVisible() ) )
    {
        const int w_ui = m_dispU.getWidth();
        const int h_ui = m_dispV.getHeight();

        int x_i = (int)(f_event_p -> posInScreen.x / dispWidth_i  * w_ui + .5);
        int y_i = (int)(f_event_p -> posInScreen.y / dispHeight_i * h_ui + .5);
        
        if ( x_i >=0 && x_i < w_ui && 
             y_i >=0 && y_i < h_ui )
        {
            printf("(v,u) (%i, %i) DispU: %f DispV: %f\n",
                   y_i, x_i, 
                   m_dispU.getScanline(y_i)[x_i],
                   m_dispV.getScanline(y_i)[x_i] );

        }
    }

    return COperator::mouseMoved ( f_event_p );
}

