/*@@@**************************************************************************
* \file.cpp
* \author Hernan Badino
* \date  Mon Jun 22 14:54:58 EDT 2009
* \notes 
*******************************************************************************
*****     (C) COPYRIGHT Hernan Badino - All Rights Reserved               *****
******************************************************************************/

/* INCLUDES */
#include "normalEstFromSri.h"
#include "sphericalRangeImage.h"
#include "drawingList.h"
#include "colorEncoding.h"
#include "stereoCamera.h"

#include "imageConversion.h"

#include "uShortImage.h"
#include "medianFilter.h"
#include "prewittFilter.h"
#include "movingSumFilter.h"

#include <stdio.h>
#include <stdlib.h>

#if defined ( _OPENMP )
#include <omp.h>
#endif

#include "ippDefs.h"

#include <sys/time.h>

using namespace VIC;

/// Constructors.
CNormalEstFromSriOp::CNormalEstFromSriOp ( COperator * const f_parent_p )
        : COperator (                           f_parent_p, "NormalEstFromSri" ),
          m_gtNormalImg (                                                     ),
          m_normalImg (                                                       ),
          m_kernelSize (                                                 3, 3 ),
          m_applySobel_b (                                               true ),
          m_applyGauss_b (                                              false ),
          m_rangeImg (                                                        ),
          m_bufferImg (                                                       ),
          m_gradXImg (                                                        ),
          m_gradYImg (                                                        ),
          m_colorEncSri (         CColorEncoding::CET_HUE, S2D<float>(3.,50.) ),
          m_colorEncGrad (        CColorEncoding::CET_HUE, S2D<float>(-1.,1.) ),
          m_encodingMode_e (                                     CEM_VERTICAL ),
          m_colorEncNormal ( CColorEncoding::CET_HUE, S2D<float>(-M_PI, M_PI) ),
          m_colorEncNormalErr ( CColorEncoding::CET_HUE, S2D<float>(-M_PI/10, M_PI/10) ),
          m_vectorLength_d (                                              0.2 ),
          m_normalMode_e (                                        NCM_NABLAOP ),
          m_show3DNormals_b (                                           false ),
          m_encodeErrorIn3DVis_b (                                       true ),
          m_bgColor (                                           100, 100, 100 ),
          m_pointSize_f (                                                 1.f ),
          m_lineWidth_f (                                                 1.f ),
          m_freeParam_d (                                                  0. ),
          m_gtObjectType_e (                                      GTOT_SPHERE ),
          m_normal2Ground (                                           0.,1.,0. ),
          m_noiseStdDev_d (                                                0. ),
          m_baseDistToObj_d (                                              5. ),
          m_errorImg (                                                        ),
          m_trials_ui (                                                     1 ),
          m_gaussNoise_b (                                               true ),
          m_invM (                                                            ),
          m_bImg (                                                            ),
          m_maskImg (                                                         ),
          m_outerProductImg (                                                 ),
          m_sumOuterProductImg (                                              ),
          m_sumLocalVectorsImgs (                                             ),
          m_rotMatrix (                                                       )
{
    /// Default value for sri.
    m_sri.setRoiTopLeft(S2D<double> (-0.35f, -0.08f));
    m_sri.setRoiSize(S2D<double> (0.7f,  .53f ));
    m_sri.setScale(S2D<double> ( 0.0019, 0.005 ));

    CParameterSet * sriSubset_p = m_sri.getParameterSet("Spherical Range Image");
    sriSubset_p -> setName ( "Spherical Range Image");
    m_paramSet_p -> addSubset ( sriSubset_p );

    registerDrawingLists();

    BEGIN_PARAMETER_GROUP("Normal computation", false, CColor::red );

      ADD_UINT2D_PARAMETER( "Kernel Size",
                            "Kernel size for the application of prewitt/sobel "
                            "filter ",
                            m_kernelSize,
                            "W", "H",
                            this,
                            KernelSize,
                            CNormalEstFromSriOp );

      ADD_BOOL_PARAMETER( "Apply Sobel (false=Prewitt)",
                          "Apply Sobel (true) or Prewitt filter for computing derivatives "
                          "(if the mask is 3x3, otherwise Prewitt is applied).",
                          m_applySobel_b,
                          this,
                          ApplySobel,
                          CNormalEstFromSriOp );

      ADD_BOOL_PARAMETER( "Apply Gauss Filter",
                          "Apply Gauss before computing gradients?",
                          m_applyGauss_b,
                          this,
                          ApplyGauss,
                          CNormalEstFromSriOp );

      CEnumParameter<ENormalComputationMethod_t> * compMode_p = static_cast<CEnumParameter<ENormalComputationMethod_t> * > (
              ADD_ENUM_PARAMETER( "Normal computation mode",
                                  "Set the normal vector computation mode.",
                                  ENormalComputationMethod_t,
                                  m_normalMode_e,
                                  this,
                                  NormalMode,
                                  CNormalEstFromSriOp ) );

      compMode_p -> addDescription ( NCM_NABLAOP,       "Nabla Operator" );
      compMode_p -> addDescription ( NCM_LS,            "Least Squares Fitting" );
      compMode_p -> addDescription ( NCM_FAST_LS,       "Fast Least Squares Fitting" );
      compMode_p -> addDescription ( NCM_INV_LS,        "Inverse Least Squares Fitting" );
      compMode_p -> addDescription ( NCM_CROSS_PRODUCT, "Cross Product" );

       ADD_LINE_SEPARATOR;

      CEnumParameter<EGTObjectType_t> * objectType_p = static_cast<CEnumParameter<EGTObjectType_t> * > (
              ADD_ENUM_PARAMETER( "Object Type",
                                  "Set the type of object for testing methods.",
                                  EGTObjectType_t,
                                  m_gtObjectType_e,
                                  this,
                                  GTObjectType,
                                  CNormalEstFromSriOp ) );

      objectType_p -> addDescription ( GTOT_SPHERE,         "Sphere" );
      objectType_p -> addDescription ( GTOT_CYLINDER,       "Cylinder" );
      objectType_p -> addDescription ( GTOT_N_AGONAL_PRISM, "N-Agonal Prism" );
      objectType_p -> addDescription ( GTOT_ROAD,           "Road" );
      objectType_p -> addDescription ( GTOT_REAL,           "Real" );

      ADD_3DVEC_PARAMETER( "Normal To Ground",
                           "Normal to ground object",
                           m_normal2Ground, 
                           "x", "y", "z",
                           this,
                           Normal2Ground,
                           CNormalEstFromSriOp );
      
      ADD_DOUBLE_PARAMETER( "Base Distance To Object",
                            "Base distance to the object created.",
                            m_baseDistToObj_d,
                            this,
                            BaseDistToObj,
                            CNormalEstFromSriOp );

      ADD_DOUBLE_PARAMETER( "Noise Standard Deviation",
                            "Apply noise to the created figure?",
                            m_noiseStdDev_d,
                            this,
                            NoiseStdDev,
                            CNormalEstFromSriOp );

      ADD_BOOL_PARAMETER( "Gauss Noise?",
                          "If true apply Gauss noise. If false apply Slash noise.",
                          m_gaussNoise_b,
                          this,
                          GaussNoise,
                          CNormalEstFromSriOp );

   END_PARAMETER_GROUP;
    BEGIN_PARAMETER_GROUP("Display Options", false, CColor::red );

      m_colorEncGrad.addParameters ( m_paramSet_p,
                               "Gradient Color Encoding", 
                               "Color encoding for the results of the sobel operation." );

      CEnumParameter<EColorEncodingMode_t> * param_p = static_cast<CEnumParameter<EColorEncodingMode_t> * > (
              ADD_ENUM_PARAMETER( "Normal color encoding mode",
                                  "Set the color encoding mode for normal vectors.",
                                  EColorEncodingMode_t,
                                  m_encodingMode_e,
                                  this,
                                  ColorEncodingMode,
                                  CNormalEstFromSriOp ) );
      
      param_p -> addDescription ( CEM_VERTICAL,   "Vertical" );
      param_p -> addDescription ( CEM_HORIZONTAL, "Horizontal" );
      param_p -> addDescription ( CEM_NORMAL_X,   "X Normal Component" );
      param_p -> addDescription ( CEM_NORMAL_Y,   "Y Normal Component" );
      param_p -> addDescription ( CEM_NORMAL_Z,   "Z Normal Component" );
      
      m_colorEncNormal.addParameters ( m_paramSet_p,
                                       "Normal Color Encoding", 
                                       "Color encoding for the normal vectors." );

      ADD_DOUBLE_PARAMETER( "Vector Length",
                            "Length of vector for displaying normals.",
                            m_vectorLength_d,
                            this,
                            VectorLength,
                            CNormalEstFromSriOp );
      
      m_colorEncNormalErr.addParameters ( m_paramSet_p,
                                          "Normal Error Color Encoding", 
                                          "Color encoding for the error in the normal estimation." );

      m_colorEncSri.addParameters ( m_paramSet_p,
                                    "SRI Color Encoding", 
                                    "Color encoding for the the generated SRI." );

      addDrawingListParameter ( "Color Enc SRI" );
      addDrawingListParameter ( "Horizontal Gradient" );
      addDrawingListParameter ( "Vertical Gradient" );
      addDrawingListParameter ( "Range/Depth Image" );
      addDrawingListParameter ( "Angular Error Image" );
      addDrawingListParameter ( "Mask Image" );

      ADD_LINE_SEPARATOR;

      ADD_BOOL_PARAMETER( "Show 3D normals",
                          "Show normals in 3D visualization tool.",
                          m_show3DNormals_b,
                          this,
                          Show3DNormals,
                          CNormalEstFromSriOp );

      ADD_BOOL_PARAMETER( "Color encode error in 3D",
                          "If true, set color from error. If false set color from SRI color encoding.",
                          m_encodeErrorIn3DVis_b,
                          this,
                          EncodeErrorIn3DVis,
                          CNormalEstFromSriOp );

      ADD_RGB_PARAMETER( "Background Color",
                         "Background Color of 3D Visualization Window",
                         m_bgColor,
                         this,
                         BackgroundColor,
                         CNormalEstFromSriOp );

      ADD_FLOAT_PARAMETER( "Point Size",
                           "Point size for 3D Visualization.",
                           m_pointSize_f,
                           this,
                           PointSize,
                           CNormalEstFromSriOp );

      ADD_FLOAT_PARAMETER( "Line Width",
                           "Line width for 3D Visualization.",
                           m_lineWidth_f,
                           this,
                           LineWidth,
                           CNormalEstFromSriOp );

    END_PARAMETER_GROUP;
 
    ADD_DOUBLE_PARAMETER( "Free Parameter",
                          "",
                          m_freeParam_d,
                          this,
                          FreeParam,
                          CNormalEstFromSriOp );
}

/// Virtual destructor.
CNormalEstFromSriOp::~CNormalEstFromSriOp ()
{
}

void CNormalEstFromSriOp::registerDrawingLists()
{
    registerDrawingList ("Color Enc SRI",
                         S2D<int> (0, 2),
                         false );

    /// Register drawing list.
    registerDrawingList ( "Horizontal Gradient",
                          S2D<int> (0, 1),
                          false );
    
    /// Register drawing list.
    registerDrawingList ( "Vertical Gradient",
                          S2D<int> (1, 1),
                          false );
    
    /// Register drawing list.
    registerDrawingList ( "Range/Depth Image",
                          S2D<int> (1, 1),
                          false );

    /// Register drawing list.
    registerDrawingList ( "Angular Error Image",
                          S2D<int> (1, 1),
                          false );

    /// Register drawing list.
    registerDrawingList ( "Mask Image",
                          S2D<int> (1, 1),
                          false );
}

/// Init event.
bool CNormalEstFromSriOp::initialize()
{
    
    allocateImages( );
   
    return COperator::initialize();
}

/// Reset event.
bool CNormalEstFromSriOp::reset()
{
    return COperator::reset();
}

bool CNormalEstFromSriOp::exit()
{
    return COperator::exit();
}

/// Cycle event.
bool CNormalEstFromSriOp::cycle()
{
    printf("\n\n\n\n\n");
    const CVelodyne * velodyne_p =
        dynamic_cast<CVelodyne*> ( getInput ( "Velodyne Sensor" ) );

    if (m_gtObjectType_e == GTOT_REAL)
    {
        const CSphericalRangeImage * sri_p = 
            dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );

        if ( !m_sri.hasSameFormat( *sri_p ) )
            m_sri.copyFrom ( *sri_p, true, false );
    }

    if ( velodyne_p )
    {
        CVelodyne vel = *velodyne_p;
        C3DMatrix rot; rot.loadIdentity();    
        vel.setRotation(rot);
        m_sri.setVelodyne ( *velodyne_p );
    }

    allocateImages (  );

    if ( !m_sriInv.hasSameFormat( m_sri ) )
    {
        recomputeRotImage();
        recomputeSinCosLuts();       
        m_sriInv.copyFrom ( m_sri, true, true );
    }
    
    recomputeInvMImage();

    int initk=3;
    
    static int firsttime_b = true;
    if (0 && firsttime_b)
    {
        firsttime_b = false;
        
        m_applySobel_b = false;
        
        //const int iters_i = 100;
        const int iters_i = 50;
        
        m_noiseStdDev_d = 0.;
        
        m_trials_ui = 1;
        
        for (int b = 10; b <= 10; b+=2.5)
        {
            m_baseDistToObj_d = b;
            
            for (int n = 1; n <=1; ++n)
            {
                m_gaussNoise_b = ((bool) 1-n);

                double increment_d = 0;//m_gaussNoise_b?2./(iters_i-1):0.4/(iters_i-1);
                //double increment_d = m_gaussNoise_b?1./0.005:0.001;
                
                for (int k = initk; k <= 9; ++k, ++k)
                {
                    m_kernelSize.width  = k;
                    m_kernelSize.height = k;

                    recomputeInvMImage(true);

                    int numObj_i = (b==10)?4:3;
                    //numObj_i = 1;
                    
                    //for (int o = numObj_i-1; o >=0 ; --o)
                    for (int o = 1; o >=1 ; --o)
                    {
                        m_gtObjectType_e = (EGTObjectType_t) o;
                        // check if sphere and change parameters.
                        if ( (o == 0 || o == numObj_i-1 ) )
                        {
                            if (o == 0)
                                m_sri.setRoi( S2D<double> (-0.5, -1.59),
                                              S2D<double> (6.35, 3.2) );
                            else
                                m_sri.setRoi( S2D<double> (-0.5, -0.7),
                                              S2D<double> (6.35, 1.5) );

                            allocateImages (  );
                            m_sriInv.copyFrom ( m_sri, true, true );

                            recomputeRotImage();
                            recomputeSinCosLuts();
                            recomputeInvMImage(true);
                        }

                        for (int m = 2; m <= 2; ++m)
                        {
                            if (m == 4) continue;
                            m_normalMode_e = (ENormalComputationMethod_t) (m);

                            for (int g = 1; g >=1; --g)
                            {
                                m_applyGauss_b = (bool)g;

                                for (int i = 0; i < iters_i; ++i)
                                {
                                    compute();
                                    
                                    m_noiseStdDev_d+=increment_d;
                                }
                                m_noiseStdDev_d = 0.;
                            }                            
                        }
                    }
                }
            }
            initk=3;
        }
    }
    else
    {
        m_trials_ui = 1;
        compute();
    }

    return COperator::cycle();
}

void CNormalEstFromSriOp::recomputeSinCosLuts()
{
    //std::vector<double> cosAz_v     ( m_rangeImg.getWidth(), 0. );
    //std::vector<double> sinAz_v     ( m_rangeImg.getWidth(), 0. );

    //std::vector<double> cosEl_v ( m_rangeImg.getHeight(), 0. );
    //std::vector<double> sinEl_v ( m_rangeImg.getHeight(), 0. );
    
    m_cosAz_v.resize     ( m_rangeImg.getWidth(), 0. );
    m_sinAz_v.resize     ( m_rangeImg.getWidth(), 0. );
    
    m_cosEl_v.resize  ( m_rangeImg.getHeight(), 0. );
    m_sinEl_v.resize  ( m_rangeImg.getHeight(), 0. );

    // calculate first cosinus of azimuth and store it in a luv.
    for (int j = 0 ; j < (int)m_rangeImg.getWidth(); ++j)
    {
        double angle_d = m_sri.column2Azimuth ( j );

        m_cosAz_v[j]     = cos(angle_d);
        m_sinAz_v[j]     = sin(angle_d);
    }   

    // calculate first cosinus of azimuth and store it in a luv.
    for (int i = 0 ; i < (int)m_rangeImg.getHeight(); ++i)
    {
        double angle_d = m_sri.row2Elevation ( i );
        
        m_cosEl_v[i] = cos(angle_d);
        m_sinEl_v[i] = sin(angle_d);
    }   


}

void CNormalEstFromSriOp::recomputeRotImage()
{
    printf("Recomputing rotation matrix image for nabla method\n");

    const CDoubleImage & rangeImg = m_sri.getImage();
    const int h_i = rangeImg.getHeight();
    const int w_i = rangeImg.getWidth();
    
    for (int i = 0 ; i < h_i; ++i)
    {
        C3DMatrix rot1; rot1.loadIdentity();
        rot1.rotateX( -m_sri.row2Elevation( i ) );

        for (int j = 0; j < w_i; ++j )
        {
            C3DMatrix rot2; rot2.loadIdentity();
            rot2.rotateY( m_sri.column2Azimuth( j ) );

            m_rotMatrix.getScanline(i)[j] = rot2 * rot1;
        }
    }   
}

void CNormalEstFromSriOp::recomputeInvMImage( bool f_force_b )
{
    static S2D<unsigned int> prevKernelSize(-1, -1);

    if (prevKernelSize == m_kernelSize && !f_force_b )
        return;

    printf("Recomputing matrix image for fast LS (prev ks: %ix%i new %ix%i\n",
           prevKernelSize.width, prevKernelSize.height,
           m_kernelSize.width,   m_kernelSize.height );

    prevKernelSize = m_kernelSize;
    
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);
    
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; ++j )
        {
            C3DVector point;
            C3DMatrix m;
            m.clear();
            
            for (int ii = i - halfKsize.height; ii <= i + halfKsize.height; ++ii)
            {
                for (int jj = j - halfKsize.width; jj <= j + halfKsize.width; ++jj)
                {
                    m_sri.image2Local ( jj, ii, 1.,
                                        point );

                    m -= point.outerProduct(point);
                }                
            }

            /// Is invert working?
            m.invert();
            m_invM.getScanline(i)[j] = m;
        }
    }   
}


void
CNormalEstFromSriOp::createMask()
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

    unsigned int clearSize_ui = halfKsize.height * m_rangeImg.getWidth();
    unsigned char *  ptr_p = m_maskImg.getData();

    m_maskImg.fill(255);

    /// finally mask the final part of the image.
    for (int i = 0; i < (int)clearSize_ui; ++i, ++ptr_p)
    {
        *ptr_p = 0;
    }

#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        /// max mask size is 255.
        float *slmConcrete_p[256];
        float **slm_p = slmConcrete_p + 128;

        /// Set pointers to upper and lower scanlines.
        for (int k = -halfKsize.height; k <= halfKsize.height; ++k)
            slm_p[k] = m_rangeImg.getScanline( i + k );

        unsigned char *  mask_p = m_maskImg.getScanline ( i );

        /// Set left border to 0.
        for ( int j = 0; j < halfKsize.width; ++j, ++mask_p )
        {
            *mask_p = 0;
        }
 
        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; ++j, ++mask_p )
        {
            /// Here the distance to the invalid pixel COULD be computed to further improve the algorithm. 
            /// Once we have found that a pixel is invalid we could continue invalidating neighbours (++j) 
            /// which also are at a distance closer than the kernel window width.
            bool break_b = false;
            for (int k = -halfKsize.height; k <= halfKsize.height && !break_b; ++k)
                for (int l = j-halfKsize.width; l <= j+halfKsize.width && !break_b; ++l)
                    if (slm_p[k][l] <= 0) 
                        break_b = true;
            if ( break_b ) 
            {
                *mask_p = 0;
            }
        }

        /// Set pointers to upper and lower scanlines.
        //for (int k = -halfKsize.height; k <= halfKsize.height; ++k)
        //    ++slm_p[k];
            
        /// Set right border to 0.
        for ( int j = m_rangeImg.getWidth()-halfKsize.height; j < (int)m_rangeImg.getWidth() ; ++j, ++mask_p )
        {
            *mask_p = 0;
        }            
    }        

    ptr_p = m_maskImg.getData() + ( m_rangeImg.getHeight() * m_rangeImg.getWidth() ) - 
        clearSize_ui;

    /// finally mask the final part of the image.
    for (int i = 0; i < (int)clearSize_ui; ++i, ++ptr_p)
    {
        *ptr_p = 0;
    }
}


void
CNormalEstFromSriOp::compute()
{
#if defined ( _OPENMP )
#define START_CLOCK start_d = omp_get_wtime()
#define STOP_CLOCK(str)  end_d  = omp_get_wtime(); sum_d += end_d - start_d; printf("grep17 time for %s (ks: %i) is %f ms\n", str, m_kernelSize.width, (end_d - start_d)*1000. ) \
    
#else
#define START_CLOCK 
#define STOP_CLOCK(str)  
#endif

    double start_d = 0;
    double end_d = 0;
    double sum_d = 0.;

    SError_t errorData;
    errorData.clear();

    for (unsigned int i = 0; i < m_trials_ui; ++i)
    {
        /// Fill SRI with corresponding ranges.
        createSRI();

        CImageConversion::convert<double, float> ( m_sri.getImage(),
                                                   m_rangeImg );
 
        

        if (m_gtObjectType_e == GTOT_REAL)
        {
            if (0)
            {
                applyGaussAndMask(m_rangeImg);
                CImageConversion::convert<float, double> ( m_rangeImg,
                                                           m_sri.getImage() ); 
                
                createMask();
                
                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );
                
                CPrewittFilter<float, float>::computeVertical ( m_rangeImg,
                                                                m_gradXImg,
                                                                //m_bufferImg,
                                                                m_kernelSize,
                                                                -m_sri.getScale().x );
                
                CPrewittFilter<float, float>::computeHorizontal ( m_rangeImg,
                                                                  m_gradYImg,
                                                                  //m_bufferImg,
                                                                  m_kernelSize,
                                                                  m_sri.getScale().y );
                
                calculateNormalsNabla(1);
            }
            else
            {
                createMask();
                
                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );
                
                computeBRangeImage();

                calculateNormalsInvLS(1);
               
            }            
        }



        //CImageConversion::convert<double, float> ( m_sri.getImage(),
        //                                           m_rangeImg );
        switch( m_normalMode_e )
        {
            case NCM_NABLAOP:
            {
                if ( m_applyGauss_b )
                {
                    START_CLOCK;

                    applyGaussAndMask(m_rangeImg);

                    STOP_CLOCK("gauss and mask");

                    CImageConversion::convert<float, double> ( m_rangeImg,
                                                               m_sri.getImage() ); 
                }

                createMask();

                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );
                
                START_CLOCK;
                if (1)
                {
                    CPrewittFilter<float, float>::computeVertical ( m_rangeImg,
                                                                    m_gradXImg,
                                                                    //m_bufferImg,
                                                                    m_kernelSize,
                                                                    -m_sri.getScale().x );
                    
                    CPrewittFilter<float, float>::computeHorizontal ( m_rangeImg,
                                                                      m_gradYImg,
                                                                      //m_bufferImg,
                                                                      m_kernelSize,
                                                                      m_sri.getScale().y );
                }
                else
                {
                    /*
                    static CFloatImage temp;
                    temp.freeMemory();
                    temp.setSize( m_rangeImg.getSize() );
                    temp.ensureAllocation();

                    temp.clear();
                    
                    CMovingSumFilter<float, float> movSumOp;
                    movSumOp.compute ( m_rangeImg,
                                       temp,
                                       S2D<unsigned int> ( 1,
                                                           m_kernelSize.height ),
                                       1./m_kernelSize.height );
                    
                    for (int i = m_kernelSize.height/2; i < temp.getHeight() - m_kernelSize.height/2; ++i)
                    {
                        float * sums_p = temp.getScanline(i) + m_kernelSize.width/2;
                        float * tgt_p  = m_gradXImg.getScanline(i) + m_kernelSize.width/2;
                        for (int j = m_kernelSize.width/2; j < temp.getWidth()-m_kernelSize.width/2; 
                             ++j, ++sums_p, ++tgt_p)
                        {
                            float sum_d = 0;
                            int div_i;
                            
                            for ( div_i = -((int)m_kernelSize.width)/2;div_i < 0; ++div_i)
                                sum_d += sums_p[div_i] / div_i;
                            
                            for ( ++div_i; div_i <= m_kernelSize.width/2; ++div_i)
                                sum_d += sums_p[div_i] / div_i;

                            //printf("Value to store at (%i %i) is %f / %f = %f\n",
                            //      i, j, sum_d, -m_sri.getScale().x, sum_d / -m_sri.getScale().x );
                            *tgt_p = sum_d / -m_sri.getScale().x;
                        }
                    }
                
                    
                    CPrewittFilter<float, float>::computeHorizontal ( m_rangeImg,
                                                                      m_gradYImg,
                                                                      //m_bufferImg,
                                                                      m_kernelSize,
                                                                      m_sri.getScale().y );
                    */
                }
                
                STOP_CLOCK("prewitt computation");

                START_CLOCK;

                calculateNormalsNabla();

                STOP_CLOCK("normal computation");
            }
            break;
            
            case NCM_LS:
            {
                if ( 0 && m_applyGauss_b )
                {
                    START_CLOCK;

                    applyGaussAndMask(m_rangeImg);

                    STOP_CLOCK("gauss and mask");

                    CImageConversion::convert<float, double> ( m_rangeImg,
                                                               m_sri.getImage() ); 
                }
                createMask();

                START_CLOCK;
                
                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );

                STOP_CLOCK("comp of 3D points");

                START_CLOCK;
                calculateNormalsLS();
                STOP_CLOCK("normal computation");
            }
            break;

            case NCM_FAST_LS:
            {
                createMask();

                START_CLOCK;
                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );
                STOP_CLOCK("comp of 3D points");

                START_CLOCK;
                computeFastLSSums();
                STOP_CLOCK("moving sum filters");

                START_CLOCK;
                calculateNormalsFastLS();
                STOP_CLOCK("normal computation");
            }
            break;

            case NCM_INV_LS:
            {
                createMask();
                
                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );
                
                START_CLOCK;
                computeBRangeImage();
                STOP_CLOCK("moving sum filters");

                START_CLOCK;
                calculateNormalsInvLS();
                STOP_CLOCK("normal computation");               
            }
            break;

            case NCM_CROSS_PRODUCT:
            {
                createMask();
                
                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );
                
                START_CLOCK;
                
                calculateNormalsCrossP();
                
                STOP_CLOCK("normal computation");
            }
            break;
            
            case NCM_RANDOM:
            default:
            {
                createMask();
                
                /// Compute 3D points from SRI
                m_localVectorPts.clear();
                m_sri.image2Local ( m_localVectorPts );
                
                START_CLOCK;
                
                calculateNormalsRandom();
                
                STOP_CLOCK("random");
            }
            break;
        }

        calculateError(errorData);
    }

    printf("... this line\n");

#if defined ( _OPENMP )
        printf("Total time for method %i is %f ms w: %i h: %i  trials: %i masksize %i points %i timeperpoint = %f ns ( grep16 )\n",
               (int) m_normalMode_e, 
               (sum_d)*1000.,
               m_rangeImg.getWidth(),
               m_rangeImg.getHeight(),
               m_trials_ui,
               m_kernelSize.width,
               errorData.count_ui,
               (sum_d)/errorData.count_ui * 1000000. );
#endif

    printf("grep16 for kernel size %i object %i basedist %.2f method %i and std dev %f is %f (%i points) (sum diffs %f) (ApplyGauss %i GaussNoise: %i)\n",
           m_kernelSize.width,
           (int)m_gtObjectType_e,
           m_baseDistToObj_d,
           (int)m_normalMode_e,
           m_noiseStdDev_d,
           errorData.mse_d/errorData.count_ui,
           errorData.count_ui,
           errorData.sumdiffs_d/errorData.count_ui,
           m_applyGauss_b,
           m_gaussNoise_b );
}

void
CNormalEstFromSriOp::computeBRangeImage()
{
    unsigned int size_ui = ( m_sri.getImage().getWidth() * 
                             m_sri.getImage().getHeight() );

    const double *  src_p =  m_sri.getImage().getData();
    double *  dst_p =  m_sriInv.getImage().getData();

    for (unsigned int i = 0 ; i < size_ui; ++i, ++src_p, ++dst_p)
        if (*src_p>0) *dst_p = 1./(*src_p); else *dst_p = 0;

    /// Compute 3D points from inverse sri
    m_localInvVectorPts.clear();
    m_sriInv.image2Local ( m_localInvVectorPts );

    /// Accumulate now all values within window size
    CMovingSumFilter<C3DVector, C3DVector> movSumOp;
    
    movSumOp.compute ( m_localInvVectorPts,
                       m_sumInvVectorsPts,
                       m_kernelSize,
                       1. );
}

void
CNormalEstFromSriOp::computeFastLSSums()
{
#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = 0; i < (signed)m_localInvVectorPts.getHeight(); ++i)
    {
        C3DMatrix *m_p = m_outerProductImg.getScanline(i);
        C3DVector *v_p = m_localVectorPts.getScanline(i);

        for (unsigned int j = 0; j < m_localInvVectorPts.getWidth(); ++j, ++v_p, ++m_p )
        {
            *m_p = v_p -> outerProduct( *v_p );
        }
    }

    /// Accumulate now all values within window size
    CMovingSumFilter<C3DMatrix, C3DMatrix> movSumMatrixOp;
    movSumMatrixOp.compute ( m_outerProductImg,
                             m_sumOuterProductImg,
                             m_kernelSize,
                             1. );

    /// Accumulate now all values within window size
    CMovingSumFilter<C3DVector, C3DVector> movSumVectorOp;
    movSumVectorOp.compute ( m_localVectorPts,
                             m_sumLocalVectorsImgs,
                             m_kernelSize,
                             -1. );

    if (1)
    {
#if defined ( _OPENMP )
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
        for (int i = 0; i < (signed)m_localInvVectorPts.getHeight(); ++i)
        {
            C3DMatrix *m_p = m_sumOuterProductImg.getScanline(i);
            
            for (unsigned int j = 0; j < m_localInvVectorPts.getWidth(); ++j, ++m_p )
            {
                m_p -> invert();
            }
        }
    }
}

void
CNormalEstFromSriOp::createSRI(  )
{
    const CSphericalRangeImage * sri_p = 
        dynamic_cast<CSphericalRangeImage*> ( getInput( "SRI" ) );

    m_sri.getImage().clear();
    
    const double m_height2Ground_d = 2;
    
    C3DVector n = m_normal2Ground; 
    n.normalize();

    for (unsigned int i = 0; i < m_sri.getImage().getHeight(); ++i)
    {
        for (unsigned int j = 0; j < m_sri.getImage().getWidth(); ++j)
        {
            C3DVector pnt;
            SSphericalPointData sphPt;
            
            //mode_si=0;
            switch ( m_gtObjectType_e )
            {
                case GTOT_SPHERE:
                {
                    m_sri.getImage().getScanline(i)[j] = m_baseDistToObj_d; // 
                    
                    m_sri.image2Local ( j, i, pnt );
                    pnt.normalize();
                    m_gtNormalImg.getScanline(i)[j] = -pnt;
                }
                break;

                case GTOT_CYLINDER:
                {
                    m_sri.image2Local ( j, i, 1, pnt );
                    //pnt *= (5/pnt.z());
                    m_sri.getVelodyne().local2Spherical ( pnt, sphPt );
                    
                    m_sri.getImage().getScanline(i)[j] = m_baseDistToObj_d/cos(sphPt.elevation_d); // 
                    
                    pnt.at(1) = 0;
                    pnt.normalize();
                    m_gtNormalImg.getScanline(i)[j] = -pnt;
                }
                break;
                
                case GTOT_N_AGONAL_PRISM:
                {
                    int divs_i = 3;
                    double deltaD_d = 2*M_PI/divs_i;
                    
                    m_sri.image2Local ( j, i, 1, pnt );
                    m_sri.getVelodyne().local2Spherical ( pnt, sphPt );

                    /// quadrant
                    double angle_d = atan2(pnt.z(), pnt.x());

                    if (angle_d<0)
                        angle_d += ((int)((-angle_d/(2*M_PI)))+1) * 2*M_PI;

                    double quad_d  = angle_d / (2*M_PI) * divs_i;
                    int quad_i     = (int)(quad_d<0?(M_PI-quad_d):quad_d);
                
                    quad_i%=divs_i;
                
                    /// rotate extreme points.
                    double cos1_d = cos(quad_i * deltaD_d);
                    double sin1_d = sin(quad_i * deltaD_d);
                    ++quad_i; quad_i%=divs_i;
                    double cos2_d = cos(quad_i * deltaD_d);
                    double sin2_d = sin(quad_i * deltaD_d);
                
                    C3DVector p4 ( m_baseDistToObj_d * cos1_d, 0, m_baseDistToObj_d * sin1_d );
                    C3DVector p3 ( m_baseDistToObj_d * cos2_d, 0, m_baseDistToObj_d * sin2_d );
                
                    double den_d   = (p4.z()  - p3.z()) * pnt.x() - (p4.x() - p3.x()) * pnt.z();
                    double range_d = ((p4.x() - p3.x()) * (-p3.z()) - (p4.z() - p3.z()) * (-p3.x()))/den_d;
                
                    m_sri.getImage().getScanline(i)[j] = fabs(range_d);///cos(sphPt.elevation_d);

                    C3DVector normal = (p3 + p4)/-2;
                    normal.normalize();
                
                    m_gtNormalImg.getScanline(i)[j] = normal;
                }
                break;
                
                case GTOT_ROAD:
                {
                    m_sri.image2Local ( j, i, 1, pnt );

                    C3DVector p0 (0, -m_height2Ground_d, 0 );

                    double range_d;
                    
                    if ( fabs(n * pnt) < 1.e-5) 
                        range_d = 0;
                    else
                        range_d = (n * p0) / (n * pnt);

                    if (range_d < 0) range_d = -range_d;
                    
                    m_sri.getImage().getScanline(i)[j] = range_d;
                    
                    m_gtNormalImg.getScanline(i)[j] = -n;
                }
                break;

                case GTOT_REAL:
                {
                    m_sri.getImage().getScanline(i)[j] = sri_p->getImage().getScanline(i)[j];
                    m_gtNormalImg.getScanline(i)[j].set(0,0,0);
                }
                break;
                
                default:
                {
                }
            }

            if ( m_noiseStdDev_d > 0 && m_sri.getImage().getScanline(i)[j] > 0 )
            {
                if ( m_gaussNoise_b )
                    m_sri.getImage().getScanline(i)[j] += getGaussianNoise ( 0, m_noiseStdDev_d); 
                else
                    m_sri.getImage().getScanline(i)[j] += getSlashNoise ( 0, m_noiseStdDev_d);
            }
            
            if (m_sri.getImage().getScanline(i)[j] <= 0) 
            {
                m_sri.getImage().getScanline(i)[j] = 0;
                m_gtNormalImg.getScanline(i)[j] = C3DVector(0,0,0);
                //printf("Setting %f to %i %i\n", m_sri.getImage().getScanline(i)[j],
                //       j, i);                
           }
            else
            {
                m_sri.image2Local ( j, i, m_sri.getImage().getScanline(i)[j], pnt );
            
                if (m_gtNormalImg.getScanline(i)[j].dotProduct ( pnt ) > 0 )
                    m_gtNormalImg.getScanline(i)[j] = - m_gtNormalImg.getScanline(i)[j];
            }               
        }
    }

    //C3DVector p0 (0, -m_height2Ground_d, 0 );
    //C3DVector pnt(0,-1,0);
    //double distToGround_d = (n * p0) / (n * pnt);
    //printf("Dist to Ground is %f with a normal of %f %f %f\n",
    //       distToGround_d, n.x(), n.y(), n.z());
    
 }

inline double
CNormalEstFromSriOp::getGaussianNoise ( const float  f_mean_f,
                                        const float  f_standardDeviation_f )
{
    float x_f, y_f, r_f;
    
    do
    {
        /* choose x,y in uniform square (-1,-1) to (+1,+1) */
        x_f = -1.0 + 2.0 * float(random())/float(RAND_MAX);
        y_f = -1.0 + 2.0 * float(random())/float(RAND_MAX);

        /* see if it is in the unit circle */
        r_f = x_f * x_f + y_f * y_f;
    }
    while (r_f > 1.0 || r_f == 0.0);

    /* Box-Muller transform */
    return double( f_standardDeviation_f * y_f * sqrtf(-2.0 * logf(r_f) / r_f) + f_mean_f );
}

inline double
CNormalEstFromSriOp::getSlashNoise ( const float  f_mean_f,
                                     const float  f_standardDeviation_f )
{
    double zmgnoise_d = getGaussianNoise ( 0, f_standardDeviation_f );
    
    return zmgnoise_d / (random()/double(RAND_MAX)) + f_mean_f;
}


void
CNormalEstFromSriOp::applyGaussAndMask(CFloatImage &fr_img)
{
    double start_d = 0;
    double end_d   = -1;
#if defined ( _OPENMP )
    start_d = omp_get_wtime();
#endif

            
    IppiSize roiSize;

    roiSize.width  = fr_img.getWidth() - 2;
    roiSize.height = fr_img.getHeight() - 2;

    IppStatus status_i = 
        ippiFilterGauss_32f_C1R ( fr_img.getScanline(1)+1,
                                  fr_img.getWidth() * sizeof(float),
                                  m_bufferImg.getScanline(1)+1,
                                  m_bufferImg.getWidth() * sizeof(float),
                                  roiSize,
                                  ippMskSize3x3 );

#if defined ( _OPENMP )
    end_d = omp_get_wtime();
    if (0)
        printf("Gauss computation for img size %ix%i is %f milliseconds\n",
               fr_img.getWidth(), fr_img.getHeight(),
               (end_d-start_d)*1000.);
#endif

    /// Now lets check for invalid values and correct the filtered
    /// range with the original.
    int h_i = fr_img.getHeight() - 1;
    int w_i = fr_img.getWidth() - 1;
    for (int i = 1; i < h_i; ++i)
    {
        float * dstm1_p = fr_img.getScanline(i-1);
        float * dst_p   = fr_img.getScanline(i);
        float * dstp1_p = fr_img.getScanline(i+1);
                
        float * src_p   = m_bufferImg.getScanline(i);

        for (int j = 1; j < w_i; ++j)
        {
            if (dst_p[j])
            {
                if ( dstm1_p[j-1] && dstm1_p[j] && dstm1_p[j+1] &&
                     dst_p  [j-1] &&               dst_p  [j+1] &&
                     dstp1_p[j-1] && dstp1_p[j] && dst_p  [j+1] )
                    dst_p[j] = src_p[j];
            }
        }
    }

    if ( status_i != ippStsNoErr)
    {
        printf("IPP failed:\n");
        printIPPError( status_i );
        return;
    }
}

bool
CNormalEstFromSriOp::calculateGradientsWithIPP()
{
    int offset_i = m_gradXImg.getWidth() * (m_kernelSize.height/2) + (m_kernelSize.width/2);

    IppStatus status;
    IppiSize dstRoiSize;
    dstRoiSize.width  = m_gradXImg.getWidth()  - m_kernelSize.width + 1;
    dstRoiSize.height = m_gradXImg.getHeight() - m_kernelSize.height + 1;
            
    float scaleX_f = 1.f, scaleY_f = 1.f;

    scaleY_f = -1.f/m_sri.getScale().y;

    IppiMaskSize mask = m_kernelSize.width == 3?ippMskSize3x3:ippMskSize5x5;

    if ( m_applySobel_b )
    {    
        status = ippiFilterSobelHorizMask_32f_C1R ( m_rangeImg.getData() + offset_i,
                                                    m_rangeImg.getWidth() * sizeof(float),
                                                    m_gradYImg.getData() + offset_i,
                                                    m_gradYImg.getWidth() * sizeof(float),
                                                    dstRoiSize,
                                                    mask );
        if ( m_kernelSize.width == 3)
            scaleY_f /= 8;
        else
            scaleY_f /= 128;
    }
    else
    {    
        status = ippiFilterPrewittHoriz_32f_C1R ( m_rangeImg.getData() + offset_i,
                                                  m_rangeImg.getWidth() * sizeof(float),
                                                  m_gradYImg.getData() + offset_i,
                                                  m_gradYImg.getWidth() * sizeof(float),
                                                  dstRoiSize );
        scaleY_f /= 6.f;
    }

    scaleX_f = 1.f/m_sri.getScale().x;
                
    if ( m_applySobel_b )
    {    
        status = ippiFilterSobelVertMask_32f_C1R ( m_rangeImg.getData() + offset_i,
                                                   m_rangeImg.getWidth() * sizeof(float),
                                                   m_gradXImg.getData() + offset_i,
                                                   m_gradXImg.getWidth() * sizeof(float),
                                                   dstRoiSize,
                                                   mask );
        if ( m_kernelSize.width == 3)
            scaleX_f /= 8;
        else
            scaleX_f /= 128;
    }
    else
    {
        status = ippiFilterPrewittVert_32f_C1R ( m_rangeImg.getData() + offset_i,
                                                 m_rangeImg.getWidth() * sizeof(float),
                                                 m_gradXImg.getData() + offset_i,
                                                 m_gradXImg.getWidth() * sizeof(float),
                                                 dstRoiSize );
        scaleX_f /= 6.;
    }

    if ( status == ippStsNoErr )
        status = ippiMulC_32f_C1IR ( scaleY_f, 
                                     m_gradYImg.getData() + offset_i,
                                     m_gradYImg.getWidth() * sizeof(float),
                                     dstRoiSize );
                
    if (status != ippStsNoErr)
    {
        logger::warn("IPP failed:");
        printIPPError( status );
        //exit(1);
        return false;
    }
                
    if ( status == ippStsNoErr )
        status = ippiMulC_32f_C1IR ( scaleX_f, 
                                     m_gradXImg.getData() + offset_i,
                                     m_gradXImg.getWidth() * sizeof(float),
                                     dstRoiSize );
                
    if (status != ippStsNoErr)
    {
        logger::warn("IPP failed:");
        printIPPError( status );
        //exit(1);
        return false;
    }
    return true;
}

void
CNormalEstFromSriOp::calculateNormalsNabla(int f_par_i)
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);
    /*
    std::vector<double> cosAz_v     ( m_rangeImg.getWidth(), 0. );
    std::vector<double> sinAz_v     ( m_rangeImg.getWidth(), 0. );

    std::vector<double> cosEl_v ( m_rangeImg.getHeight(), 0. );
    std::vector<double> sinEl_v ( m_rangeImg.getHeight(), 0. );

    cosAz_v.resize     ( m_rangeImg.getWidth(), 0. );
    sinAz_v.resize     ( m_rangeImg.getWidth(), 0. );

    cosEl_v.resize  ( m_rangeImg.getHeight(), 0. );
    sinEl_v.resize  ( m_rangeImg.getHeight(), 0. );

    // calculate first cosinus of azimuth and store it in a luv.
    for (int j = 0 ; j < (int)m_rangeImg.getWidth(); ++j)
    {
        double angle_d = m_sri.column2Azimuth ( j );

        cosAz_v[j]     = cos(angle_d);
        sinAz_v[j]     = sin(angle_d);
    }   

    // calculate first cosinus of azimuth and store it in a luv.
    for (int i = 0 ; i < (int)m_rangeImg.getHeight(); ++i)
    {
        double angle_d = m_sri.row2Elevation ( i );
        
        cosEl_v[i] = cos(angle_d);
        sinEl_v[i] = sin(angle_d);
    }   
    */
    //const double scaleAz_d = m_sri.getScale().x;
    //const double scaleEl_d = m_sri.getScale().y;
    
    if  (f_par_i == 1)
        m_gtNormalImg.clear();
    else
        m_normalImg.clear();

#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        float *      slx_p = m_gradXImg.getScanline ( i ) + halfKsize.width;
        float *      sly_p = m_gradYImg.getScanline ( i ) + halfKsize.width;
        float *      slr_p = m_rangeImg.getScanline ( i ) + halfKsize.width;
        //C3DVector *  slp_p = m_localVectorPts.getScanline ( i ) + halfKsize.width;

        unsigned char * mask_p = m_maskImg.getScanline ( i ) + halfKsize.width;

        C3DVector *  sln_p;
        if  (f_par_i == 1)
            sln_p = m_gtNormalImg.getScanline ( i ) + halfKsize.width;
        else
            sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
        
        const double &sinEl_d = m_sinEl_v[i];
        const double &cosEl_d = m_cosEl_v[i];

        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++slx_p, ++sly_p, ++slr_p, ++sln_p, ++mask_p )
        {
            if ( !*mask_p ) continue;
                
            const double &sinAz_d = m_sinAz_v[j];
            const double &cosAz_d = m_cosAz_v[j];

            if (1)
            {
                /// Del operator
                double aux_d = *slr_p * cosEl_d - *sly_p * sinEl_d;
                
                sln_p -> set ( -(*slx_p * cosAz_d + cosEl_d * sinAz_d * aux_d),
                               cosEl_d * (*sly_p * cosEl_d + sinEl_d * *slr_p),
                               (*slx_p * sinAz_d - cosEl_d * cosAz_d * aux_d ));
                
                //if (sln_p -> dotProduct ( *slp_p ) > 0 )
                //    *sln_p = - *sln_p;                
            }
            else
            {
                C3DVector v;
                v.set(*slx_p / cosEl_d, *sly_p,  *slr_p );
                *sln_p = m_rotMatrix.getScanline(i)[j] * v;
                sln_p->at(0) = -sln_p->at(0);
                sln_p->at(3) = -sln_p->at(3);
            }
            
            sln_p -> normalize();
            
            //if (sln_p -> dotProduct ( *slp_p ) > 0 )
            //    *sln_p = - *sln_p;            
        }
    }
}

void
CNormalEstFromSriOp::calculateNormalsLS()
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

    m_normalImg.clear();
    
#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        unsigned char * mask_p = m_maskImg.getScanline ( i ) + halfKsize.width;

        C3DVector *  sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
        C3DVector *  slp_p = m_localVectorPts.getScanline ( i ) + halfKsize.width;
            
        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++sln_p, ++slp_p, ++mask_p )
        {
            if ( !*mask_p ) continue;
                
            //int refpnt_i =  (int)((m_kernelSize.width * m_kernelSize.height))/2;
            C3DVector plane;
            
            std::vector<C3DVector> points_v;
            for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
            {
                C3DVector * sl = m_localVectorPts.getScanline ( i+ii );
                for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
                    points_v.push_back(sl[j+jj]);
            }
            
            computeLeastSquarePlane ( points_v, *sln_p );
            
            //if (sln_p -> dotProduct ( points_v[refpnt_i] ) > 0 )
            //    *sln_p = - *sln_p;

            sln_p -> normalize();            
        }
    }
}

void
CNormalEstFromSriOp::calculateNormalsFastLS()
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

    m_normalImg.clear();
    
#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        unsigned char * mask_p = m_maskImg.getScanline ( i ) + halfKsize.width;

        C3DVector *  sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
        //C3DVector *  slp_p = m_localVectorPts.getScanline ( i ) + halfKsize.width;

        C3DVector *  vecB_p = m_sumLocalVectorsImgs.getScanline ( i ) + halfKsize.width;
        C3DMatrix *  matM_p = m_sumOuterProductImg.getScanline ( i ) + halfKsize.width;
        
        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++vecB_p, ++matM_p, ++sln_p, ++mask_p )
        {
            if ( !*mask_p ) continue;

            if (1)
            {
                *sln_p = *matM_p * *vecB_p;
                //testConditioning(*matM_p);
                //vecB_p->print();
            }
            else
            {
                C3DVector vectorB; 
                C3DMatrix matrixM;
                vectorB.clear();
                matrixM.clear();
                
                for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
                {
                    for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
                    {
                        C3DVector &vec = m_localVectorPts.getScanline ( i+ii ) [j+jj];
                        vectorB -= vec;
                        matrixM += vec.outerProduct(vec);
                    }
                }
                
                vectorB.print();
                m_sumLocalVectorsImgs.getScanline(i)[j].print();
                matrixM.invert();

                *sln_p = matrixM * vectorB;
            }

            sln_p -> normalize();
                    
            //if (sln_p -> dotProduct ( *slp_p ) > 0 )
            //    *sln_p = - *sln_p;
        }
    }
}

void
CNormalEstFromSriOp::calculateNormalsInvLS(int f_par_i)
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

   if  (f_par_i == 1)
        m_gtNormalImg.clear();
    else
        m_normalImg.clear();
    
#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        unsigned char * mask_p = m_maskImg.getScanline ( i ) + halfKsize.width;

        C3DVector *  sln_p;
        if  (f_par_i == 1)
            sln_p = m_gtNormalImg.getScanline ( i ) + halfKsize.width;
        else
            sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;

        C3DVector *  vecB_p = m_sumInvVectorsPts.getScanline ( i ) + halfKsize.width;
        C3DMatrix *  matM_p = m_invM.getScanline ( i ) + halfKsize.width;
        //C3DVector *  slp_p = m_localVectorPts.getScanline ( i ) + halfKsize.width;

        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++vecB_p, ++matM_p, ++sln_p, ++mask_p )
        {
            if ( !*mask_p ) {continue; }

            *sln_p = *matM_p * *vecB_p;
            sln_p -> normalize();
            
            //printf("for %i %i: ", i, j);
            //sln_p -> print();
            //if (sln_p -> dotProduct ( *slp_p ) > 0 )
            //    *sln_p = - *sln_p;

            //vectorB.clear();
            /*
              for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
              {
              for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
              vectorB += m_localInvVectorPts.getScanline ( i+ii ) [j+jj];
              }
            */
            //printf("vectorB:"); vectorB.print();
            //printf("m_sumInvVectorsPts"); m_sumInvVectorsPts.getScanline(i)[j].print();                    
            
            //if (sln_p -> dotProduct ( *slp_p ) > 0 )
            //    *sln_p = - *sln_p;
        }
    }
}

void
CNormalEstFromSriOp::calculateNormalsRandom()
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

    m_normalImg.clear();
    
#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        unsigned char * mask_p = m_maskImg.getScanline ( i ) + halfKsize.width;
        C3DVector *  sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
        C3DVector *  slp_p = m_localVectorPts.getScanline ( i ) + halfKsize.width;

        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++sln_p, ++mask_p, ++slp_p )
        {
            if ( !*mask_p ) continue;

            *sln_p = C3DVector( random()-RAND_MAX/2, 
                                random()-RAND_MAX/2, 
                                random()-RAND_MAX/2 );

            sln_p -> normalize();            

            if (sln_p -> dotProduct ( *slp_p ) > 0 )
                *sln_p = - *sln_p;

            sln_p -> normalize();            
        }
    }
}

void
CNormalEstFromSriOp::calculateNormalsCrossP()
{
    S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

    m_normalImg.clear();
    
#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
    {
        unsigned char * mask_p = m_maskImg.getScanline ( i ) + halfKsize.width;

        C3DVector *  sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
            
        for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
             ++j, ++sln_p, ++mask_p )
        {
            if ( !*mask_p ) continue;

            C3DVector refpnt = m_localVectorPts.getScanline(i)[j];
                    
            C3DVector plane;
            plane.clear();

            std::vector<C3DVector> points_v;
            for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
            {
                C3DVector * sl = m_localVectorPts.getScanline ( i+ii );
                for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
                    if (ii != 0 && jj != 0)
                        points_v.push_back(sl[j+jj] - refpnt);
            }

            int cnt=0;
            for (unsigned int k = 0; k < points_v.size(); ++k)
            {
                for (unsigned int l = k+1; l < points_v.size(); ++l)
                {
                    C3DVector cp = points_v[k].crossProduct(points_v[l]);
                    if (isfinite(cp.z())) { plane += cp; ++cnt; }
                }
            }

            plane /= cnt; //points_v.size() * (points_v.size()-1)/2;

            *sln_p = plane; 
            sln_p -> normalize();

            if (sln_p -> dotProduct ( refpnt ) > 0 )
                *sln_p = - *sln_p;
        }
    }
}

// void
// CNormalEstFromSriOp::calculateNormals()
// {
//     S2D<int> halfKsize (m_kernelSize.width/2, m_kernelSize.height/2);

//     std::vector<double> cosAz_v     ( m_rangeImg.getWidth(), 0. );
//     std::vector<double> cos2Az_v    ( m_rangeImg.getWidth(), 0. );
//     std::vector<double> sinAz_v     ( m_rangeImg.getWidth(), 0. );
//     std::vector<double> tanAz_v     ( m_rangeImg.getWidth(), 0. );
//     std::vector<double> sinXcosAz_v ( m_rangeImg.getWidth(), 0. );
//     std::vector<C3DMatrix> rotAz_v  ( m_rangeImg.getWidth() );

//     std::vector<double> cosEl_v ( m_rangeImg.getHeight(), 0. );
//     std::vector<double> sinEl_v ( m_rangeImg.getHeight(), 0. );
//     std::vector<double> tanEl_v ( m_rangeImg.getHeight(), 0. );

//     cosAz_v.resize     ( m_rangeImg.getWidth(), 0. );
//     cos2Az_v.resize    ( m_rangeImg.getWidth(), 0. );
//     sinAz_v.resize     ( m_rangeImg.getWidth(), 0. );
//     tanAz_v.resize     ( m_rangeImg.getWidth(), 0. );
//     sinXcosAz_v.resize ( m_rangeImg.getWidth(), 0. );

//     cosEl_v.resize  ( m_rangeImg.getHeight(), 0. );
//     sinEl_v.resize  ( m_rangeImg.getHeight(), 0. );
//     tanEl_v.resize  ( m_rangeImg.getHeight(), 0. );

//     // calculate first cosinus of azimuth and store it in a luv.
//     for (int j = 0 ; j < (int)m_rangeImg.getWidth(); ++j)
//     {
//         double angle_d = m_sri.column2Azimuth ( j );
        
//         cosAz_v[j]     = cos(angle_d);
//         cos2Az_v[j]    = cosAz_v[j] * cosAz_v[j];
//         sinAz_v[j]     = sin(angle_d);
//         tanAz_v[j]     = sinAz_v[j] / cosAz_v[j];
//         sinXcosAz_v[j] = sinAz_v[j] * cosAz_v[j];
//         rotAz_v[j].loadIdentity();
//         rotAz_v[j].rotateY ( angle_d );
//     }   

//     // calculate first cosinus of azimuth and store it in a luv.
//     for (int i = 0 ; i < (int)m_rangeImg.getHeight(); ++i)
//     {
//         double angle_d = m_sri.row2Elevation ( i );
        
//         cosEl_v[i] = cos(angle_d);
//         sinEl_v[i] = sin(angle_d);
//         tanEl_v[i] = sinEl_v[i] / cosEl_v[i];
//     }   

//     //const double scaleAz_d = m_sri.getScale().x;
//     //const double scaleEl_d = m_sri.getScale().y;
    
//     m_normalImg.clear();
    
//     if (0)
//         printf("\ngrep1\n");

// #if defined ( _OPENMP )
//     const unsigned int numThreads_ui = omp_get_max_threads();
// #pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
// #endif
//     for (int i = halfKsize.height ; i < (int)m_rangeImg.getHeight()-halfKsize.height; ++i)
//     {
//         float *      slx_p = m_gradXImg.getScanline ( i ) + halfKsize.width;
//         float *      sly_p = m_gradYImg.getScanline ( i ) + halfKsize.width;
//         float *      slr_p = m_rangeImg.getScanline ( i ) + halfKsize.width;
            
//         unsigned char * mask_p = m_maskImg.getScanline ( i ) + halfKsize.width;

//         C3DVector *  sln_p = m_normalImg.getScanline ( i ) + halfKsize.width;
//         C3DVector *  slp_p = m_localVectorPts.getScanline ( i ) + halfKsize.width;
            
//         double elevation_d = m_sri.row2Elevation  ( i );
        
//         C3DMatrix rotElevation;
//         rotElevation.loadIdentity();
//         rotElevation.rotateX ( elevation_d );

//         //double invCosEl_d = 1./cosEl_v[i];

//         for (int j = halfKsize.width; j < (int)m_rangeImg.getWidth()-halfKsize.width; 
//              ++j, ++slx_p, ++sly_p, ++slr_p, ++sln_p, ++slp_p, ++mask_p )
//         {
//             if ( !*mask_p ) continue;
                
//             double azimuth_d = m_sri.column2Azimuth  ( j );
//             //double sign_d =  cosEl_v[i]>0?1:-1;

//             double &sinAz_d = sinAz_v[j];
//             double &cosAz_d = cosAz_v[j];
//             double &sinEl_d = sinEl_v[i];
//             double &cosEl_d = cosEl_v[i];

//             switch ( m_normalMode_e )
//             {
//                 case NCM_NABLAOP:
//                 {
//                     if (1)
//                     {
//                         /// Del operator
//                         double aux_d = *slr_p * cosEl_d - *sly_p * sinEl_d;
                        
//                         sln_p -> set ( -(*slx_p * cosAz_d + cosEl_d * sinAz_d * aux_d),
//                                        cosEl_d * (*sly_p * cosEl_d + sinEl_d * *slr_p),
//                                        (*slx_p * sinAz_d - cosEl_d * cosAz_d * aux_d ));
                        
//                         //if (sln_p -> dotProduct ( *slp_p ) > 0 )
//                         //    *sln_p = - *sln_p;
                        
//                         if (0) // evaluate deviation.
//                             printf("%f %f %f %f %f grep1\n",
//                                    azimuth_d, elevation_d, 
//                                    sln_p->x(),
//                                    sln_p->y(),
//                                    sln_p->z() );
                        
//                     }
//                     else
//                     {
//                         C3DVector v;
//                         v.set(*slx_p / cosEl_d, *sly_p,  *slr_p );
//                         *sln_p = m_rotMatrix.getScanline(i)[j] * v;
//                         sln_p->at(0) = -sln_p->at(0);
//                         sln_p->at(3) = -sln_p->at(3);
//                     }
                    
//                         sln_p -> normalize();
                        
//                         //if (sln_p -> dotProduct ( points_v[refpnt_i] ) > 0 )
//                         //    *sln_p = - *sln_p;
                        
//                         if (0)
//                         {
//                             printf ("DIFF (%i, %i) (%f, %f, %f)  (az: %f el: %f gradAz: %f gradEl: %f range: %f)\n",
//                                     i,j, 
//                                     sln_p->x(),
//                                     sln_p->y(),
//                                     sln_p->z(), azimuth_d, elevation_d, *slx_p, *sly_p, *slr_p );
//                         }
//                 }
//                 break;
 
//                 case NCM_LS:
//                 {
//                     int refpnt_i =  (int)((m_kernelSize.width * m_kernelSize.height))/2;
//                     C3DVector plane;
                    
//                     std::vector<C3DVector> points_v;
//                     for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
//                     {
//                         C3DVector * sl = m_localVectorPts.getScanline ( i+ii );
//                         for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
//                             points_v.push_back(sl[j+jj]);
//                     }
                    
//                     computeLeastSquarePlane ( points_v, *sln_p );
                    
//                     sln_p -> normalize();

//                     if (sln_p -> dotProduct ( points_v[refpnt_i] ) > 0 )
//                         *sln_p = - *sln_p;
                    
                    
//                     if (0)
//                         printf ("LS   (%i, %i) (%f, %f, %f)\n",
//                                 i,j, 
//                                 sln_p->x(),
//                                 sln_p->y(),
//                                 sln_p->z() );
//                 }
//                 break;

//                 case NCM_FAST_LS:
//                 {
//                     C3DVector vectorB=m_sumLocalVectorsImgs.getScanline(i)[j];
//                     C3DMatrix matrixM=m_sumOuterProductImg.getScanline(i)[j];

//                     /*
//                     vectorB.clear();
//                     matrixM.clear();
                    
//                     for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
//                     {
//                         for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
//                         {
//                             C3DVector &vec = m_localVectorPts.getScanline ( i+ii ) [j+jj];
//                             vectorB -= vec;
//                             matrixM += vec.outerProduct(vec);
//                         }
//                     }

//                     printf("Vec:");
//                     vectorB.print();
                    
//                     printf("Vec precomputed:");
//                     m_sumLocalVectorsImgs.getScanline(i)[j].print();
//                     */

//                     //matrixM.invert();

//                     *sln_p = matrixM * vectorB;
                    
//                     sln_p -> normalize();
                    
//                     if (sln_p -> dotProduct ( *slp_p ) > 0 )
//                         *sln_p = - *sln_p;
//                 }
//                 break;
                
//                 case NCM_INV_LS:
//                 {
//                     C3DVector vectorB = m_sumInvVectorsPts.getScanline(i)[j];
                    
//                     //vectorB.clear();
//                     /*
//                     for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
//                     {
//                         for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
//                             vectorB += m_localInvVectorPts.getScanline ( i+ii ) [j+jj];
//                     }
//                     */
//                     //printf("vectorB:"); vectorB.print();
//                     //printf("m_sumInvVectorsPts"); m_sumInvVectorsPts.getScanline(i)[j].print();                    
                    
//                     *sln_p = m_invM.getScanline(i)[j] * vectorB;
//                     sln_p -> normalize();
                    
//                     //if (sln_p -> dotProduct ( m_localVectorPts.getScanline ( i ) [j] ) > 0 )
//                     //    *sln_p = - *sln_p;
//                 }
//                 break;
                
//                 case NCM_CROSS_PRODUCT:
//                 {
//                     C3DVector refpnt = m_localVectorPts.getScanline(i)[j];
                    
//                     C3DVector plane;
//                     plane.clear();

//                     std::vector<C3DVector> points_v;
//                     for (int ii = -halfKsize.height ; ii <=  halfKsize.height; ++ii)
//                     {
//                         C3DVector * sl = m_localVectorPts.getScanline ( i+ii );
//                         for (int jj = -halfKsize.width ; jj <= halfKsize.width; ++jj)
//                             if (ii != 0 && jj != 0)
//                             points_v.push_back(sl[j+jj] - refpnt);
//                     }

//                     int cnt=0;
//                     for (unsigned int k = 0; k < points_v.size(); ++k)
//                     {
//                         for (unsigned int l = k+1; l < points_v.size(); ++l)
//                         {
//                             C3DVector cp = points_v[k].crossProduct(points_v[l]);
//                             if (isfinite(cp.z())) { plane += cp; ++cnt; }
//                         }
//                     }

//                     plane /= cnt; //points_v.size() * (points_v.size()-1)/2;

//                     *sln_p = plane; 
//                     sln_p -> normalize();

//                     if (sln_p -> dotProduct ( refpnt ) > 0 )
//                         *sln_p = - *sln_p;
//                  }
//                 break;

//                 default:
//                     sln_p -> clear();
//                     break;
//             }

//             if (0)
//             {
//                 C3DVector gtpnt = m_gtNormalImg.getScanline(i)[j];
//                 C3DVector diff  = gtpnt - *sln_p;
//                 printf ("meas (%f, %f, %f) gt  (%f, %f, %f) diff  (%f, %f, %f)\n",
//                         sln_p->x(), sln_p->y(), sln_p->z(), 
//                         gtpnt.x(), gtpnt.y(), gtpnt.z(), 
//                         diff.x(), diff.y(), diff.z() );
                
//             }

//             //*sln_p = rotation * *sln_p;

//             //sln_p -> print();                    
//             //printf("for (%i, %i) gradX = %f gradY = %f range = %f normal = (%f, %f, %f)\n",
//             //       i, j, *slx_p / 8.f, *sly_p / 8.f, *slr_p, sln_p->x(), sln_p->y(), sln_p->z() );
//         }

//         //printf("For %i %i normal is %f %f %f\n",
//         //       j, i, sln_p->x(),sln_p->y(),sln_p->z());            
//     }
// }

void
CNormalEstFromSriOp::calculateError( SError_t &errorData )
{
    C3DVector *  sln_p     = m_normalImg.getData();
    C3DVector *  gtn_p     = m_gtNormalImg.getData();
    double *     err_p     = m_errorImg.getData();
    unsigned char * mask_p = m_maskImg.getData();

    //float *      slr_p = m_rangeImg.getScanline ( i )  + halfKsize.width;
    
    unsigned int size_ui = ( m_localVectorPts.getWidth() * 
                             m_localVectorPts.getHeight() );
    
    m_errorImg.clear();

    //double mse_d = 0;
    //double sumdiffs_d = 0;
    //unsigned int count_ui = 0;
    
    for ( unsigned int i = 0 ; i < size_ui; ++i, ++sln_p, ++gtn_p, ++err_p, ++mask_p )
    {
        if ( *mask_p && 
             isnormal(sln_p->x()) && 
             isnormal(sln_p->y()) && 
             isnormal(sln_p->z()) &&
             isnormal(gtn_p->x()) && 
             isnormal(gtn_p->y()) && 
             isnormal(gtn_p->z())   )
        {
            double dotp_d = *gtn_p * *sln_p;
            C3DVector diff = *gtn_p - *sln_p;
            errorData.sumdiffs_d += diff.magnitude();

            if (dotp_d <= -1 || dotp_d >=1) //fabs(dotp_d - 1.) < 1.e-4 || fabs(dotp_d + 1.) < -1.e-4)
                   *err_p = 0;
//             else if ( !(dotp_d < 1.0001 && dotp_d <= 1 ) || !isnormal(dotp_d))
//             {
//                 printf("grep16 does this happen? (%40.30f) (%40.30f) (%40.30f)\n",
//                        dotp_d,
//                        dotp_d -1, 
//                        dotp_d + 1);
//                 gtn_p->print();
//                 sln_p->print();
//                 //getchar();
//                 continue;
//             }
            else
                *err_p = acos(dotp_d);

            errorData.mse_d += *err_p;
            ++errorData.count_ui;
        }
        else
        {
            //printf("sln:");
            //sln_p->print();
            //gtn_p->print();
            
        }
        
    }
}

void
CNormalEstFromSriOp::computeLeastSquarePlane ( const std::vector<C3DVector> & f_points_v,
                                              C3DVector &                    fr_plane )
{
    long double sumxx=0., sumyy=0., sumzz=0., sumxy=0., sumxz=0., sumyz=0.;
    
    C3DVector vectorSum;
    vectorSum.clear();
    
    int n = f_points_v.size();
    
    for (int i = 0; i < n; ++i)
    {
        vectorSum += f_points_v[i];
        sumxx     += f_points_v[i].x() * f_points_v[i].x();
        sumyy     += f_points_v[i].y() * f_points_v[i].y();
        sumzz     += f_points_v[i].z() * f_points_v[i].z();
        sumxy     += f_points_v[i].x() * f_points_v[i].y();
        sumxz     += f_points_v[i].x() * f_points_v[i].z();
        sumyz     += f_points_v[i].y() * f_points_v[i].z();
    }

    C3DMatrix A;
    C3DVector vectorAvg = vectorSum / n;
    
    A.at(0,0) = (double)(sumxx - vectorAvg.x() * vectorSum.x());
    A.at(1,1) = (double)(sumyy - vectorAvg.y() * vectorSum.y());
    A.at(2,2) = (double)(sumzz - vectorAvg.z() * vectorSum.z());
    A.at(0,1) = A.at(1,0) = (double)(sumxy - vectorAvg.y() * vectorSum.x() );
    A.at(0,2) = A.at(2,0) = (double)(sumxz - vectorAvg.z() * vectorSum.x() );
    A.at(1,2) = A.at(2,1) = (double)(sumyz - vectorAvg.z() * vectorSum.y() );
    
    //A.print();
    //testConditioning(A);

    if (1)
    {
        double buffer_p[16]; 
        C3DVector eigenValues;
        C3DMatrix eigenVectors;
        
        IppStatus res = 
            ippmEigenValuesVectorsSym_m_64f ( &A.at(0,0), 
                                              3 * sizeof(double), sizeof(double),
                                              buffer_p, 
                                              &eigenVectors.at(0,0), 
                                              3 * sizeof(double), sizeof(double),
                                              &eigenValues.at(0), 
                                              3 );

        if (res!=ippStsNoErr ) {fr_plane.set(0,0,0); return; }
        
        int idx;
        
        if ( eigenValues.x() < eigenValues.y() ) 
        {
            if (eigenValues.x() <  eigenValues.z() ) 
                idx = 0;
            else
                idx = 2;
        }
        else
            if ( eigenValues.y() < eigenValues.z() ) 
                idx = 1;
            else
                idx = 2;

        fr_plane.set( eigenVectors.at(0, idx), 
                      eigenVectors.at(1, idx), 
                      eigenVectors.at(2, idx) );

        //printf("Eigenvalues: ");
        //eigenValues.print();
        //printf("Eigenvectors: ");
        //eigenVectors.print();
        
    }
    else
    {
        double m = A.trace()/3;

        C3DMatrix I;
        I.loadIdentity();

        C3DMatrix K = A - I * m;

        double q = K.determinant()/2;
 
        double p = 0;
    
        for ( int i = 0; i < 9; ++i)
            p += K.at(i) * K.at(i);

        p = p/6;
    
        double phi = 1./3. * atan2( sqrt ( p*p*p - q*q ), q );

        if ( phi < 0 )
            phi += M_PI/3.;
        
        double cosphi_d = cos(phi);
        double sinphi_d = sin(phi);
        double sqrt3_d  = sqrt(3);
        double sqrtp_d  = sqrt(p);

        double eig1 = m + 2 * sqrtp_d * cosphi_d;
        double eig2 = m - sqrtp_d * ( cosphi_d + sqrt3_d * sinphi_d );
        double eig3 = m - sqrtp_d * ( cosphi_d - sqrt3_d * sinphi_d );

        double eigv = std::min(std::min(eig1, eig2), eig3);
    
        A.at(0,0) -= eigv;
        A.at(1,1) -= eigv;
        A.at(2,2) -= eigv;
    
        for (int i = 0 ; i < 3; ++i)
        {
            double scale_d = A.at(i,i);
            for (int j = 0; j < 3; ++j)
                A.at(i,j) /= scale_d;

            for (int k = i+1; k < 3; ++k)
            {
                if (k != i)
                {
                    double val_d = A.at(k, i);

                    for (int j = i; j < 3; ++j)
                        A.at(k,j) -= val_d * A.at(i,j);
                }
            }
        }

        fr_plane.set( -A.at(0,1) * A.at(1,2) + A.at(0,2),
                      A.at(1,2),
                      -1.);
    }
}

#if 0

// inline void
// CNormalEstFromSriOp::computeLeastSquarePlane ( const std::vector<C3DVector> & f_points_v,
//                                               C3DVector &                    fr_plane )
// {
//     /*
//     fr_plane.set( random()/(double)RAND_MAX,
//                   random()/(double)RAND_MAX,
//                   random()/(double)RAND_MAX );
//     return;
//     */  

//     double sumxx=0., sumyy=0., sumzz=0., sumxy=0., sumxz=0., sumyz=0.;
        
//     C3DVector vectorSum;
//     vectorSum.clear();
    
//     int n = f_points_v.size();

//     for (int i = 0; i < n; ++i)
//     {
//         vectorSum += f_points_v[i];
//         sumxx     += f_points_v[i].x() * f_points_v[i].x();
//         sumyy     += f_points_v[i].y() * f_points_v[i].y();
//         sumzz     += f_points_v[i].z() * f_points_v[i].z();
//         sumxy     += f_points_v[i].x() * f_points_v[i].y();
//         sumxz     += f_points_v[i].x() * f_points_v[i].z();
//         sumyz     += f_points_v[i].y() * f_points_v[i].z();
//     }

//     C3DMatrix M;
//     C3DVector vectorAvg = vectorSum / n;

//     M.at(0,0) = sumxx - vectorAvg.x() * vectorSum.x();
//     M.at(1,1) = sumyy - vectorAvg.y() * vectorSum.y();
//     M.at(2,2) = sumzz - vectorAvg.z() * vectorSum.z();
//     M.at(0,1) = M.at(1,0) = ( sumxy - vectorAvg.y() * vectorSum.x() );
//     M.at(0,2) = M.at(2,0) = ( sumxz - vectorAvg.z() * vectorSum.x() );
//     M.at(1,2) = M.at(2,1) = ( sumyz - vectorAvg.z() * vectorSum.y() );

//     //printf("matrix M is"); M.print();
//     //testConditioning(M);
    

//     if (0)
//     {
//         double buffer_p[16]; 
//         C3DVector eigenValues;
//         C3DMatrix eigenVectors;
        
//         IppStatus res = 
//             ippmEigenValuesVectorsSym_m_64f ( &M.at(0,0), 
//                                               3 * sizeof(double), sizeof(double),
//                                               buffer_p, 
//                                               &eigenVectors.at(0,0), 
//                                               3 * sizeof(double), sizeof(double),
//                                               &eigenValues.at(0), 
//                                               3 );

//         if (res!=ippStsNoErr ) {fr_plane.set(0,0,0); return; }
        
//         int idx;
        
//         if ( eigenValues.x() < eigenValues.y() ) 
//         {
//             if (eigenValues.x() <  eigenValues.z() ) 
//                 idx = 0;
//             else
//                 idx = 2;
//         }
//         else
//             if ( eigenValues.y() < eigenValues.z() ) 
//                 idx = 1;
//             else
//                 idx = 2;

//         fr_plane.set( eigenVectors.at(0, idx), 
//                       eigenVectors.at(1, idx), 
//                       eigenVectors.at(2, idx) );

//         //printf("Eigenvalues: ");
//         //eigenValues.print();
//         //printf("Eigenvectors: ");
//         //eigenVectors.print();
        
//     }
//     else
//     {
//         static const double sqrt3_d = sqrt(3);
    
//         double m = M.trace()/3;

//         C3DMatrix I;
//         I.loadIdentity();

//         C3DMatrix K = M - (I * m);

//         double q = K.determinant()/2;
 
//         double p = 0;
    
//         for ( int i = 0; i < 9; ++i)
//             p += K.at(i) * K.at(i);

//         p = p/6;
    
//         double phi = 1./3. * atan( sqrt ( p*p*p - q*q ) / q );

//         if ( phi < 0 )
//             phi += M_PI/3;
        

//         double eig1 = m + 2 * sqrt(p) * cos(phi);
//         double eig2 = m - sqrt(p) * ( cos(phi) + sqrt3_d * sin(phi) );
//         double eig3 = m - sqrt(p) * ( cos(phi) - sqrt3_d * sin(phi) );

//         double eigv;
        
//         eigv = std::min(std::min(eig1, eig2), eig3);

//         //double eigvMax;
//         //eigvMax = std::max(std::max(eig1, eig2), eig3);
//         //printf("max to min eigenvalue is %f\n", eigvMax/eigv);

//         //if (fabs(eigv) < 1.e-5) eigv = 0;

//         //printf("%f %f %f dist %f\n", eig1, eig2, eig3, f_points_v[f_points_v.size()/2].z());

//         M.at(0,0) -= eigv;
//         M.at(1,1) -= eigv;
//         M.at(2,2) -= eigv;

//         for (int i = 0 ; i < 3; ++i)
//         {
//             double scale_d = M.at(i,i);
//             for (int j = 0; j < 3; ++j)
//                 M.at(i,j) /= scale_d;

//             for (int k = i+1; k < 3; ++k)
//             {
//                 if (k != i)
//                 {
//                     double val_d = M.at(k, i);

//                     for (int j = i; j < 3; ++j)
//                         M.at(k,j) -= val_d * M.at(i,j);
//                 }
//             }
//         }

//         fr_plane.set( -M.at(0,1) * M.at(1,2) + M.at(0,2),
//                       M.at(1,2),
//                       -1.);

//         //printf("Eigenvector: %f %f %f\n", fr_plane.x(), fr_plane.y(), fr_plane.z());
//     }
    
// }

#endif

void
CNormalEstFromSriOp::testConditioning(const C3DMatrix &f_matrix )
{
    C3DMatrix invm = f_matrix;
    invm.invert();
    
    C3DVector inf1, inf2;
    inf1.clear();
    inf2.clear();
    
    for (int i = 0; i < 3; ++i)
    {
        for (int j = 0; j < 3; ++j)
        {
            inf1.at(i) += fabs(f_matrix.at(i, j));
            inf2.at(i) += fabs(invm.at(i, j));
        }
    }
    
    double min1_d = std::max(std::max(inf1.x(), inf1.y()), inf1.z() );
    double min2_d = std::max(std::max(inf2.x(), inf2.y()), inf2.z() );

    printf("conditioning of matrix is %f\n", min1_d * min2_d);
        
}



void
CNormalEstFromSriOp::allocateImages ( )
{
    S2D<unsigned int> size = m_sri.getImage().getSize();

    printf("New SRI size is %ix%i\n", 
           size.width, size.height );

    printf("POINTER IS %p\n", 
           m_rangeImg.getData() );

    if ( size != m_rangeImg.getSize() )
    {
        m_bufferImg.freeMemory();
        m_bufferImg.setSize( size );
        m_bufferImg.ensureAllocation();
        
        m_gradYImg.freeMemory();
        m_gradYImg.setSize( size );
        m_gradYImg.ensureAllocation();
        
        m_gradXImg.freeMemory();
        m_gradXImg.setSize( size );
        m_gradXImg.ensureAllocation();
        
        m_gtNormalImg.freeMemory();
        m_gtNormalImg.setSize ( size );
        m_gtNormalImg.ensureAllocation();        

        m_normalImg.freeMemory();
        m_normalImg.setSize ( size );
        m_normalImg.ensureAllocation();        

        m_localVectorPts.freeMemory();
        m_localVectorPts.setSize ( size );
        m_localVectorPts.ensureAllocation();

        m_errorImg.freeMemory();
        m_errorImg.setSize ( size );
        m_errorImg.ensureAllocation();

        m_rangeImg.freeMemory();
        m_rangeImg.setSize( size );
        m_rangeImg.ensureAllocation();

        m_invM.freeMemory();
        m_invM.setSize( size );
        m_invM.ensureAllocation();

        m_localInvVectorPts.freeMemory();
        m_localInvVectorPts.setSize ( size );
        m_localInvVectorPts.ensureAllocation();

        m_sumInvVectorsPts.freeMemory();
        m_sumInvVectorsPts.setSize ( size );
        m_sumInvVectorsPts.ensureAllocation();

        m_maskImg.freeMemory();
        m_maskImg.setSize( size );
        m_maskImg.ensureAllocation();

        m_outerProductImg.freeMemory();
        m_outerProductImg.setSize( size );
        m_outerProductImg.ensureAllocation();
        
        m_sumOuterProductImg.freeMemory();
        m_sumOuterProductImg.setSize( size );
        m_sumOuterProductImg.ensureAllocation();
        
        m_sumLocalVectorsImgs.freeMemory();
        m_sumLocalVectorsImgs.setSize( size );
        m_sumLocalVectorsImgs.ensureAllocation();

        m_rotMatrix.freeMemory();
        m_rotMatrix.setSize( size );
        m_rotMatrix.ensureAllocation();
    }
}


/// Show event.
bool CNormalEstFromSriOp::show()
{
    CDrawingList *  list1_p; //, * list2_p;

    list1_p = getDrawingList ("Mask Image");
    list1_p -> clear();

    if ( list1_p -> isVisible())
        list1_p -> addImage ( m_maskImg,
                              0, 0, 
                              800, 600, 
                              255.f );

    list1_p = getDrawingList ("Angular Error Image");
    list1_p -> clear();

    if ( list1_p -> isVisible())
        list1_p -> addColorEncImage ( &m_errorImg,
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncNormalErr, 
                                      1.f,
                                      false );

    list1_p = getDrawingList ("Color Enc SRI");
    list1_p -> clear();

    if ( list1_p -> isVisible())
        list1_p -> addColorEncImage ( &m_sri.getImage(),
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncSri, 
                                      1.f,
                                      false );

    list1_p = getDrawingList ("Vertical Gradient");
    list1_p -> clear();

    if ( list1_p -> isVisible() )
        list1_p -> addColorEncImage ( &m_gradYImg,
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncGrad, 
                                      1.f,
                                      false );

    list1_p = getDrawingList ("Horizontal Gradient");
    list1_p -> clear();

    if ( list1_p -> isVisible() )
        list1_p -> addColorEncImage ( &m_gradXImg,
                                      0, 0, 
                                      800, 600, 
                                      m_colorEncGrad, 
                                      1.f,
                                      false );
    
    list1_p = getDrawingList ("Range/Depth Image");
    list1_p -> clear();

    CColorEncoding * colorEnc_p =
        dynamic_cast<CColorEncoding *>(getInput ( "SRI - Color encoding" ) );

    if ( list1_p -> isVisible() && colorEnc_p )
        list1_p -> addColorEncImage ( &m_rangeImg,
                                      0, 0, 
                                      800, 600, 
                                      *colorEnc_p, 
                                      1.f,
                                      false );

#if defined HAVE_QGLVIEWER
    show3D();
#endif

    return COperator::show();
}

void
CNormalEstFromSriOp::show3D()
{
#if defined HAVE_QGLVIEWER
    
    printf("SHOWING 3D NORMALS %i %i\n", (m_3dViewer_p != 0), m_show3DNormals_b );
    if (m_3dViewer_p && m_show3DNormals_b)
    {
        //m_3dViewer_p->clear();
        
        const C3DVector *  sln_p   = m_normalImg.getData();
        const double *     err_p   = m_errorImg.getData();
        const double *     range_p = m_sri.getImage().getData();

        //float *      slr_p = m_rangeImg.getScanline ( i )  + halfKsize.width;
        
        unsigned int size_ui = ( m_localVectorPts.getWidth() * 
                                 m_localVectorPts.getHeight() );
        
        C3DVector *  slw_p         = m_localVectorPts.getScanline (0);

        m_3dViewer_p -> setBackgroundColor ( m_bgColor );
        m_3dViewer_p -> setLineWidth ( m_lineWidth_f );
        m_3dViewer_p -> setPointSize ( m_pointSize_f );
        

        SRgb color;

        for (unsigned int i = 0 ; i < size_ui; ++i, ++slw_p, ++sln_p, ++err_p, ++range_p)
        {
            if ( *sln_p )
            {
                //printf("%i %i %f\n", i/m_localVectorPts.getWidth(),
                //       i%m_localVectorPts.getWidth(), *err_p );
                
                if (m_encodeErrorIn3DVis_b)
                    m_colorEncNormalErr.colorFromValue ( *err_p,
                                                         color );
                else
                    m_colorEncSri .colorFromValue ( *range_p,
                                                    color );
                
                C3DVector vec = *sln_p * m_vectorLength_d;
                
                m_3dViewer_p -> addPoint ( *slw_p, color, vec );
            }
        }
    }
    
#endif
}



void 
CNormalEstFromSriOp::keyPressed ( CKeyEvent * f_event_p )
{
   return COperator::keyPressed ( f_event_p );    
}

void 
CNormalEstFromSriOp::mouseMoved ( CMouseEvent * f_event_p )
{
    const CDrawingList *  list1_p =  getDrawingList ("Horizontal Gradient");
    const CDrawingList *  list2_p =  getDrawingList ("Vertical Gradient");

    int dispWidth_i  = 800;
    int dispHeight_i = 600;
    
    if ( ( f_event_p -> displayScreen == list1_p->getPosition() &&
           list1_p -> isVisible() ) || 
         ( f_event_p -> displayScreen == list2_p->getPosition() &&
           list2_p -> isVisible() ) )
    {
        const int w_ui = m_gradXImg.getWidth();
        const int h_ui = m_gradXImg.getHeight();

        int x_i = (int)(f_event_p -> posInScreen.x / dispWidth_i  * w_ui + .5);
        int y_i = (int)(f_event_p -> posInScreen.y / dispHeight_i * h_ui + .5);
        
        if ( x_i >=0 && x_i < w_ui && 
             y_i >=0 && y_i < h_ui )
        {
            printf("(v,u) (%i, %i) El: %f rad (%f deg) Az: %f (%f deg)Range: %f Dx: %f Dy: %f\n",
                   y_i, x_i, 
                   m_sri.row2Elevation ( y_i ),
                   m_sri.row2Elevation ( y_i ) * 180 / M_PI,
                   m_sri.column2Azimuth ( x_i ),
                   m_sri.column2Azimuth ( x_i ) * 180 / M_PI,
                   m_sri.getImage().getScanline(y_i)[x_i],
                   m_gradXImg.getScanline(y_i)[x_i],
                   m_gradYImg.getScanline(y_i)[x_i] );

        }
    }

    return COperator::mouseMoved ( f_event_p );
}

