/*@@@**************************************************************************
 * \file  kmStereoOp
 * \author Hernan Badino
 * \date  Wed Apr  8 14:27:15 GMT 2009
 * \notes 
 *******************************************************************************
 ******************************************************************************/

/* INCLUDES */
#include "kmStereoOp.h"
#include "logger.h"
#include "drawingList.h"
#include "stereoCamera.h"
#include "colorEncoding.h"
#include "uShortImage.h"
#include "medianFilter.h"
#include "3DMatrix.h"

//#include "h/kmStereo.h"
#include <sys/time.h>


#include "h/dynProgOp.h"
#include "h/pyrDynProg.h"

#include "gaussianPyramid.h"


using namespace VIC;

/// Constructors.
CKmStereoOp::CKmStereoOp ( COperator * const f_parent_p )
        : COperator (       f_parent_p, "KmStereo" ),

          m_dsi (            CDsiOpFloat_t::CT_DUV ),
          m_optDsi (         CDsiOpFloat_t::CT_DUV ),

          m_leftRes (                              ),

          m_maxDisparity_i (                   100 ),
          m_zssdKernelWidth_ui (                 3 ),
          m_zssdKernelHeight_ui (                3 ),
          m_normalizeZssd_b (                 true ),

          m_maxLevel_ui (                        2 ),
          m_distCost_f (                     0.05f ),
          m_distThreshold_f (                  0.f ),
          m_predCost_f (                       0.f ),
          m_predThreshold_f (                  0.f ),
          m_predCost4Prop_f (                  0.f ),
          m_predThreshold4Prop_f (             0.f ),
          m_initialCost_f (                    0.f ),
          m_minCostValue_f (                   0.f ),
          m_maxCostValue_f (                 1.e9f ),
          m_applyMedian_b (                  false ),
          m_useGradientPred_b (              false ),
          m_mfKernelSize_ui (                    7 ),
          m_followPathPrevRow_b (            false ),
          m_predPathPrevRow_b (              false ),
          m_pathTolerance_ui (                   5 ),
          m_deltaDisp_ui (                       1 ),
          m_computeWTA_b (                   false ),
          m_dispImgWTA (                           ),
          m_computeWTAwithBounds_b (         false ),
          m_dispImgWTAwithBounds (                 ),
          m_computeDP_b (                    false ),
          m_computeDPwithPred_b (            false ),
          m_computeVDP_b (                   false ),
          m_dispImgDP (                            ),
          m_dispImgDPwithPred (                    ),
          m_dispImgVDP (                           ),
          m_computeDPwithBounds_b (          false ),
          m_dispImgDPwithBounds (                  ),
          m_computePyrDP_b (                 false ),
          m_dispImgPyrDP (                         ),
          m_computePyrDPwithBounds_b (       false ),
          m_dispImgPyrDPwithBounds (               ),
          m_auxDispMap (                           ),                  
          m_updateOptDsi_b (                 false ),
          m_stereoOp (                 320, 240, 2 ),
#if defined ( _OPENMP )
          m_numThreads_ci ( 
                  std::min(omp_get_max_threads(), 32) ),
#else 
          m_numThreads_ci (                      1 ),
#endif
          m_dsiSliceScale_d (                   1. ),
          m_dsiSliceOffset_d (                  0. ),
          m_showOptDsi_b (                   false ),

          m_refineDispMap_b (                 true ),
          m_preApplyMedianFilter_b (         false ),
          m_postApplyMedianFilter_b (         true ),
          m_medianFilterMode_e (           MFM_3x3 ),
          m_checkMinDsi_b (                   true ),
          m_useOptDsi_b (                    false ),
          m_maxDispDiff_f (                   1.0f ),
          m_useRRI_b (                       false ),

          m_computeResiduum_b (              false ),
          m_dispImgResiduum (                      ),
          m_displayResiduumNorm_f (              5 ),
          m_vectorImg (                            ),
          m_show3DPoints_b (                 false ),
          m_show3DMesh_b (                   false ),
          m_maxDist4Mesh_f (                   0.5 ),
          m_maxDisp4Mesh_f (                     3 )
{


    registerDrawingList ( "KM Stereo Disparity Image",
                          S2D<int> (2, 2),
                          false );

    registerDrawingList ( "Stereo Bird View",
                          S2D<int> (0, 2),
                          false );
    
    registerDrawingList ( "WTA Raw",
                          S2D<int>(2, 0),
                          false );

    registerDrawingList ( "WTA w/ Bounds",
                          S2D<int>(0, 1),
                          false );

    registerDrawingList ( "Dynamic Programming",
                          S2D<int>(2, 1),
                          false );

    registerDrawingList ( "Dynamic Programming with Pred",
                          S2D<int>(2, 1),
                          false );

    registerDrawingList ( "Vertical Dynamic Programming",
                          S2D<int>(0, 2),
                          false );

    registerDrawingList ( "Dynamic Programming w/ Bounds",
                          S2D<int>(2, 2),
                          false );

    registerDrawingList ( "Pyramidal Dynamic Programming",
                          S2D<int>(2, 1),
                          false );

    registerDrawingList ( "Pyramidal Dynamic Programming w/ Bounds",
                          S2D<int>(2, 2),
                          false );

    registerDrawingList ( "DSI Slice",
                          S2D<int>(1, 0),
                          false );

    registerDrawingList ( "DSI Slice Row Overlay",
                          S2D<int>(0, 0),
                          false );

    registerDrawingList ( "Residuum",
                          S2D<int>(0, 0),
                          false );

    /*
      CDrawingList *  list_p;
      list_p = getDrawingList ("Rectified Left Image");
      list_p -> setPosition ( S2D<int> (0, 1) );
      list_p -> setVisibility ( true );

      list_p = getDrawingList ("Rectified Right Image");
      list_p -> setPosition ( S2D<int> (1, 1) );
      list_p -> setVisibility ( true );
    */

    BEGIN_PARAMETER_GROUP("Dynamic Programming Params", false, CColor::red );

    ADD_UINT_PARAMETER ( "ZSSD Kernel Width",
                         "Width of the aggregation window [px].",
                         m_zssdKernelWidth_ui,
                         this,
                         ZssdKernelWidth,
                         CKmStereoOp );

    ADD_UINT_PARAMETER ( "ZSSD Kernel Height",
                         "Height of the aggregation window [px].",
                         m_zssdKernelHeight_ui,
                         this,
                         ZssdKernelHeight,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "Normalize ZSSD",
                         "Normalize the resuling zssd of the aggreation window?",
                         m_normalizeZssd_b,
                         this,
                         NormalizeZssd,
                         CKmStereoOp );

    END_PARAMETER_GROUP;


    BEGIN_PARAMETER_GROUP("Stereo Algorithms", false, CColor::red );
    ADD_BOOL_PARAMETER ( "Raw WTA",
                         "Compute stereo with raw WTA.",
                         m_computeWTA_b,
                         this,
                         ComputeWTA,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "WTA with bounds",
                         "Compute stereo with WTA with bounds.",
                         m_computeWTAwithBounds_b,
                         this,
                         ComputeWAwithBounds,
                         CKmStereoOp );
      
    ADD_BOOL_PARAMETER ( "Raw Dynamic Programming",
                         "Compute stereo with DP.",
                         m_computeDP_b,
                         this,
                         ComputeDP,
                         CKmStereoOp );
      
    ADD_BOOL_PARAMETER ( "Raw DP with Prediction",
                         "Compute stereo with DP and prediction.",
                         m_computeDPwithPred_b,
                         this,
                         ComputeDPwithPred,
                         CKmStereoOp );
 
    ADD_BOOL_PARAMETER ( "Raw Vertical DP",
                         "Compute stereo with vertical DP.",
                         m_computeVDP_b,
                         this,
                         ComputeVDP,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "DP with bounds",
                         "Compute stereo with DP and bounds.",
                         m_computeDPwithBounds_b,
                         this,
                         ComputeDPwithBounds,
                         CKmStereoOp );
      
    ADD_BOOL_PARAMETER ( "Pyramidal DP",
                         "Compute stereo with pyramidal dynamic programming.",
                         m_computePyrDP_b,
                         this,
                         ComputePyrDP,
                         CKmStereoOp );
      
    ADD_BOOL_PARAMETER ( "Pyramidal DP with bounds",
                         "Compute stereo with pyramidal dynamic programming and bounds.",
                         m_computePyrDPwithBounds_b,
                         this,
                         ComputePyrDPwithBounds,
                         CKmStereoOp );
      
    ADD_BOOL_PARAMETER ( "Update optimized DSI",
                         "Update the optimized dsi.",
                         m_updateOptDsi_b,
                         this,
                         UpdateOptDsi,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "Use Remapped Right Image",
                         "Use no the original but a "
                         "remmapped right image (if available).",
                         m_useRRI_b,
                         this,
                         UseRRI,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "Compute Residuum",
                         "Compute the difference between prediction and obtained disparity image.",
                         m_computeResiduum_b,
                         this,
                         ComputeResiduum,
                         CKmStereoOp );

    ADD_LINE_SEPARATOR;
    
    ADD_BOOL_PARAMETER ( "Refine Disparity Map",
                         "Refine disparity map by outliers elimination and SPG computation.",
                         m_refineDispMap_b,
                         this,
                         RefineDispMap,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "Pre-apply MF",
                         "Pre-apply MF before computing SPG",
                         m_preApplyMedianFilter_b,
                         this,
                         PreApplyMedianFilter,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "Post-apply MF",
                         "Post-apply MF after having computed SPG",
                         m_postApplyMedianFilter_b,
                         this,
                         PostApplyMedianFilter,
                         CKmStereoOp );

    CEnumParameter<EMedianFilterMode_t> * mfMode_p = static_cast<CEnumParameter<EMedianFilterMode_t> * > (
            ADD_ENUM_PARAMETER( "MF Kernel Size",
                                "Size of the kernel for MF computation.",
                                EMedianFilterMode_t,
                                m_medianFilterMode_e,
                                this,
                                MedianFilterMode,
                                CKmStereoOp ) );

    mfMode_p-> addDescription ( MFM_3x3, "3x3" );
    mfMode_p-> addDescription ( MFM_5x5, "5x5" );


    ADD_BOOL_PARAMETER ( "Check 4 min in DSI",
                         "Check if disparity is local minimum in DSI space.",
                         m_checkMinDsi_b,
                         this,
                         CheckMinDsi,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "Use Optimized DSI?",
                         "Use Optimized DSI or standard DSI for checking local minima.",
                         m_useOptDsi_b,
                         this,
                         UseOptDsi,
                         CKmStereoOp );

    ADD_FLOAT_PARAMETER ( "Disp diff for LR-RL",
                          "Max Disp difference in LR-RL consistency check.",
                          m_maxDispDiff_f,
                          this,
                          MaxDispDiff,
                          CKmStereoOp );


    END_PARAMETER_GROUP;

    BEGIN_PARAMETER_GROUP("Dynamic Programming", false, CColor::red );

    ADD_UINT_PARAMETER ( "Max Pyramid Level",
                         "0 is the first level (original size)",
                         m_maxLevel_ui,
                         this,
                         MaxLevel,
                         CKmStereoOp );


    ADD_FLOAT_PARAMETER ( "Distance Cost",
                          "Cost of a jump in disparity [cost/px].",
                          m_distCost_f,
                          this,
                          DistCost,
                          CKmStereoOp );
    
    ADD_FLOAT_PARAMETER ( "Max Cost Distance",
                          "The cost will be linear for every jump in disparity "
                          "up to a maximum distance [px].",
                          m_distThreshold_f,
                          this,
                          DistThreshold,
                          CKmStereoOp );
    
    ADD_FLOAT_PARAMETER ( "Prediction cost",
                          "Cost of the deviation with a prediction [cost/px]",
                          m_predCost_f,
                          this,
                          PredCost,
                          CKmStereoOp );
    
    ADD_FLOAT_PARAMETER ( "Max Prediction Distance",
                          "The prediction cost will be linear for every jump in "
                          "disparity up to a maximum distance [px].",
                          m_predThreshold_f,
                          this,
                          PredThreshold,
                          CKmStereoOp );
    
    ADD_FLOAT_PARAMETER ( "Prediction cost 4 Prop",
                          "Cost of the deviation with a prediction of the previous result [cost/px]",
                          m_predCost4Prop_f,
                          this,
                          PredCost4Prop,
                          CKmStereoOp );
    
    ADD_FLOAT_PARAMETER ( "Max Prediction Distance 4 Prop",
                          "The prediction cost will be linear for every jump in "
                          "disparity up to a maximum distance [px].",
                          m_predThreshold4Prop_f,
                          this,
                          PredThreshold4Prop,
                          CKmStereoOp );

    ADD_FLOAT_PARAMETER ( "Initial cost",
                          "Negative cost to benefit low disparities [cost].",
                          m_initialCost_f,
                          this,
                          InitialCost,
                          CKmStereoOp );
    
    ADD_FLOAT_PARAMETER ( "Min Cost",
                          "Values smaller than this cost will not be considered in the "
                          "search of the best path [cost].",
                          m_minCostValue_f,
                          this,
                          MinCostValue,
                          CKmStereoOp );

    ADD_FLOAT_PARAMETER ( "Max Cost",
                          "Values larger than this cost will not be considered in the "
                          "search of the best path [cost].",
                          m_maxCostValue_f,
                          this,
                          MaxCostValue,
                          CKmStereoOp );
    
    ADD_BOOL_PARAMETER (  "Apply median filter to the result?",
                          "The resulting path will be median filtered.",
                          m_applyMedian_b,
                          this,
                          ApplyMedian,
                          CKmStereoOp );

    ADD_UINT_PARAMETER (  "MF kernel size",
                          "Size of the kernel for MF [px]",
                          m_mfKernelSize_ui,     
                          this,
                          MfKernelSize,
                          CKmStereoOp );

    ADD_BOOL_PARAMETER (  "Use gradient prediction?",
                          "Use the gradient prediction obtained from the normals.",
                          m_useGradientPred_b,
                          this,
                          UseGradientPred,
                          CKmStereoOp );

    ADD_BOOL_PARAMETER (  "Predict Path from Previous row?",
                          "The result of the previous row is used as a "
                          "prediction for the current one.",
                          m_predPathPrevRow_b,
                          this,
                          PredPathPrevRow,
                          CKmStereoOp );
        

    ADD_BOOL_PARAMETER (  "Follow path of previous row?",
                          "The search is reduced according to the "
                          "result of the previous row.",
                          m_followPathPrevRow_b,
                          this,
                          FollowPathPrevRow,
                          CKmStereoOp );
        
    ADD_UINT_PARAMETER ( "Follow Path Tolerance",
                         "The nodes to consider following the path "
                         "are those within a maximal distance of the "
                         "follow path. [px]",
                         m_pathTolerance_ui,
                         this,
                         PathTolerance,
                         CKmStereoOp );

    ADD_UINT_PARAMETER ( "Delta Disparity",
                         "Disparity to add to the range for bounded versions.",
                         m_deltaDisp_ui,
                         this,
                         DeltaDisp,
                         CKmStereoOp );

    END_PARAMETER_GROUP;

    BEGIN_PARAMETER_GROUP("Display", false, CColor::red );

    addDrawingListParameter ( "KM Stereo Disparity Image" );
    addDrawingListParameter ( "Stereo Bird View" );

    addDrawingListParameter ( "WTA Raw" );
    addDrawingListParameter ( "WTA w/ Bounds" );

    addDrawingListParameter ( "Dynamic Programming" );
    addDrawingListParameter ( "Dynamic Programming with Pred" );
    addDrawingListParameter ( "Vertical Dynamic Programming" );
    addDrawingListParameter ( "Dynamic Programming w/ Bounds" );

    addDrawingListParameter ( "Pyramidal Dynamic Programming" );
    addDrawingListParameter ( "Pyramidal Dynamic Programming w/ Bounds" );

    addDrawingListParameter ( "Residuum" );

    ADD_FLOAT_PARAMETER ( "Residuum Norm",
                          "Norm to use for displaying the residuum image.",
                          m_displayResiduumNorm_f,
                          this,
                          DisplayResiduumNorm,
                          CKmStereoOp );



    ADD_LINE_SEPARATOR;

    addDrawingListParameter ( "DSI Slice" );

    addDrawingListParameter ( "DSI Slice Row Overlay");

    ADD_BOOL_PARAMETER ( "Show optimized DSI",
                         "If true, show optimized DSI. If false shows standard DSI.",
                         m_showOptDsi_b,
                         this,
                         ShowOptDsi,
                         CKmStereoOp );

    ADD_DOUBLE_PARAMETER ( "DSI Slice scale factor",
                           "Scale factor for normalizing the values to display.",
                           m_dsiSliceScale_d,
                           this,
                           DsiSliceScale,
                           CKmStereoOp );

    ADD_DOUBLE_PARAMETER ( "DSI Slice offset",
                           "Scale factor for normalizing the values to display.",
                           m_dsiSliceOffset_d,
                           this,
                           DsiSliceOffset,
                           CKmStereoOp );


    ADD_LINE_SEPARATOR;

    ADD_BOOL_PARAMETER ( "Show 3D Points",
                         "Show points in 3D display.",
                         m_show3DPoints_b,
                         this,
                         Show3DPoints,
                         CKmStereoOp );

    ADD_BOOL_PARAMETER ( "Show 3D Mesh",
                         "Show mesh in 3D display.",
                         m_show3DMesh_b,
                         this,
                         Show3DMesh,
                         CKmStereoOp );

    ADD_FLOAT_PARAMETER ( "Max Depth Distance 4 Mesh",
                          "Max distance between vertices to show a triangle.",
                          m_maxDist4Mesh_f,
                          this,
                          MaxDist4Mesh,
                          CKmStereoOp );

    ADD_FLOAT_PARAMETER ( "Max Disp Distance 4 Mesh",
                          "Max disparity distance between vertices to show a triangle.",
                          m_maxDisp4Mesh_f,
                          this,
                          MaxDisp4Mesh,
                          CKmStereoOp );

    END_PARAMETER_GROUP;

    //m_paramSet_p -> addSubset ( m_stereoOp.getParameterSet() );
    
}

/// Virtual destructor.
CKmStereoOp::~CKmStereoOp ()
{
}

/// Cycle event.
bool CKmStereoOp::cycle()
{
    struct timeval tv1, tv2;
    
    //gettimeofday(&tv1, NULL);

    //calculateKmStereo( );
    
    //gettimeofday(&tv2, NULL);

    //printf("STEREO TIME %lf milliseconds\n",
    //       (((double)tv2.tv_usec) - ((double)tv1.tv_usec))/1000.);
    
    //m_leftRes.clear();
    
    //registerOutput ( "Disparity Image", &m_leftRes  );    
    
    gettimeofday(&tv1, NULL);
    {
#if 1
        /// Get input from parent.
        CUShortImage * imgL_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Left Image" ) );
        CUShortImage * imgR_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Right Image" ) );
        
        typedef CDispSpaceImageOp<unsigned short int, short int> MyCDispSpaceImageOp;
        
        static MyCDispSpaceImageOp dsiOp  ( MyCDispSpaceImageOp::CT_DUV );
        static MyCDispSpaceImageOp dsiOp3 ( MyCDispSpaceImageOp::CT_UVD );
        
        for (int maxd = 10; 0 && maxd < 150; ++maxd)
        {
            dsiOp.setImageSizes ( imgL_p -> getWidth(), 
                                  imgL_p -> getHeight(),
                                  0, maxd );
            
            dsiOp3.setImageSizes ( imgL_p -> getWidth(), 
                                   imgL_p -> getHeight(),
                                   0, maxd );
            double time1_d = 0, time2_d = 0;
            double time1s_d = 0, time2s_d = 0;
            double minTime1_d, minTime2_d ;
            double time_d;
            
            int maxIters_i = 50;
            
            for (int i=0; i < maxIters_i; ++i)
            {
                gettimeofday(&tv1, NULL);
                dsiOp.compute (* imgL_p , * imgR_p );
                gettimeofday(&tv2, NULL);

                time_d = fabs((((double)tv2.tv_usec) - ((double)tv1.tv_usec))/1000.);
                time1_d += time_d;
                time1s_d += time_d * time_d;

                if (time_d < minTime1_d || i==0)
                    minTime1_d = time_d;
                
                gettimeofday(&tv1, NULL);
                dsiOp3.compute (* imgL_p , * imgR_p );
                gettimeofday(&tv2, NULL);

                time_d = fabs((((double)tv2.tv_usec) - ((double)tv1.tv_usec))/1000.);
                time2_d += time_d;
                time2s_d += time_d * time_d;
                
                if (time_d < minTime2_d || i==0)
                    minTime2_d = time_d;

            }
            
            printf("D: %i DUV %lf (%lf, min %lf) UVD %lf (%lf, min %lf) milliseconds\n",
                   maxd+1, 
                   time1_d/maxIters_i, 
                   sqrt(time1s_d / maxIters_i - (time1_d/maxIters_i)*(time1_d/maxIters_i)),
                   minTime1_d,
                   time2_d/1000,
                   sqrt(time2s_d / maxIters_i - (time2_d/maxIters_i)*(time2_d/maxIters_i)),
                   minTime2_d );
        }
#endif // 0
      
        //////////////////////////////////////////////////////////////////////

        double start_d = 0;
        double end_d = 0;
        
        //CUShortImage * imgL_p       = dynamic_cast<CUShortImage *>(getInput ( "Rectified Left Image" ) );
        //CUShortImage * imgR_p;
        
        if ( !m_useRRI_b)
            imgR_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Right Image" ) );
        else
            imgR_p       = dynamic_cast<CUShortImage *>(getInput ( "Remapped Rectified Right Image" ) );
    
        if (!imgL_p || !imgR_p) return false;

#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        if ( m_zssdKernelWidth_ui  == 1 &&
             m_zssdKernelHeight_ui == 1 )
            m_dsi.compute (* imgL_p , * imgR_p );
        else        
            m_dsi.computeZSSD (* imgL_p , * imgR_p,
                               m_zssdKernelWidth_ui,
                               m_zssdKernelHeight_ui,
                               m_normalizeZssd_b );

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif
    
        printf("DSI Computation  %lf milliseconds\n",
               (end_d-start_d)*1000.);

#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        remapDsi();

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

    printf("DSI REMAPPING  %lf milliseconds\n",
           (end_d-start_d)*1000.);

        testDSI();

        

        //////////////////////////////////////////////////////////////////////


        CFloatImage * fLImg_p = dynamic_cast<CFloatImage *>(getInput ( "Rectified Float Left Image" ) );
        CFloatImage * fRImg_p = dynamic_cast<CFloatImage *>(getInput ( "Rectified Float Right Image" ) );

#if 0
        CGaussianPyramid<float> fPyramidOpLeft ( fLImg_p->getWidth(), 
                                                 fLImg_p->getHeight(),
                                                 6 );

        CGaussianPyramid<float> fPyramidOpRight ( fRImg_p->getWidth(), 
                                                  fRImg_p->getHeight(),
                                                  6 );

        CGaussianPyramid<unsigned short> pyramidOpLeft ( fLImg_p->getWidth(), 
                                                         fLImg_p->getHeight(),
                                                         6 );

        CGaussianPyramid<unsigned short> pyramidOpRight ( fRImg_p->getWidth(), 
                                                          fRImg_p->getHeight(),
                                                          6 );

        const int testLevel_i = 3;
        
        gettimeofday(&tv1, NULL);

        fPyramidOpLeft.compute  ( *fLImg_p );
        fPyramidOpRight.compute ( *fRImg_p );

        gettimeofday(&tv2, NULL);
        printf("pyramid construction: %f\n",
               fabsf(tv2.tv_usec - tv1.tv_usec)/1000.);

        typedef CDispSpaceImageOp<float, float> CTestFloatDSI;

        CTestFloatDSI myfdsi(CTestFloatDSI::CT_DUV );
        
        myfdsi.setImageSizes ( fPyramidOpLeft.getLevelImage(testLevel_i) -> getWidth(), 
                               fPyramidOpLeft.getLevelImage(testLevel_i) -> getHeight(),
                               -2, 
                               (m_maxDisparity_i / pow(2.,testLevel_i) ) + 2 );


        gettimeofday(&tv1, NULL);

        //myfdsi.compute (* fPyramidOpLeft.getLevelImage(testLevel_i), 
        //                * fPyramidOpRight.getLevelImage(testLevel_i) );

        //myfdsi.computeZSSD (* fPyramidOpLeft.getLevelImage(2), 
        //                    * fPyramidOpRight.getLevelImage(2),
        //                    3,
        //                    3,
        //                    m_normalizeZssd_b );

        gettimeofday(&tv2, NULL);
        printf("DSI 1x1 for level %i: %f\n", testLevel_i,
               fabsf(tv2.tv_usec - tv1.tv_usec)/1000.);

        gettimeofday(&tv1, NULL);

        myfdsi.computeZSSD (* fPyramidOpLeft.getLevelImage(2), 
                            * fPyramidOpRight.getLevelImage(2),
                            3,
                            3,
                            m_normalizeZssd_b );

        gettimeofday(&tv2, NULL);
        printf("DSI nxm for level %i: %f (%ix%ix%i)\n", testLevel_i,
               fabsf(tv2.tv_usec - tv1.tv_usec)/1000.,
               myfdsi.getDisparitySpaceImage().width_ui,
               myfdsi.getDisparitySpaceImage().height_ui,
               myfdsi.getDisparitySpaceImage().dispRange_ui );

#endif
      
        //////////////////////////////////////////////////////////////////////

        if (0)
        {
            m_stereoOp.compute ( *fLImg_p, *fRImg_p );
            
            CFloatImage *dispImg_p = m_stereoOp.getDisparityImage();
            
            CDrawingList *  list_p;
            list_p = getDrawingList ("Test PyrDynProgStereo");
            list_p -> clear();
            list_p -> addImage ( *dispImg_p, 0, 0, 800, 600, 1./(m_maxDisparity_i) );
        }
        
        //printf("%p\n", 

        //////////////////////////////////////////////////////////////////////

        //CDrawingList *  list_p = getDrawingList ("test pyramid level"); 
        //list_p -> clear();
        //list_p -> addImage ( *fPyramidOpLeft.getLevelImage(3),  
        //                     0, 0, 800, 600, 1./65535 );

        /*
          static CUShortImage temporal[600];
          static CUShortImage testDispImg ( m_leftRes );
          testDispImg.ensureAllocation();
         

          CDrawingList *  list_p; 
          char name[256];
          sprintf(name, "TEMPORAL"); 
          list_p = getDrawingList (name);
          list_p -> setVisibility(true);
          list_p -> setPosition(S2D<int>(1,2));
        
          list_p -> clear();
         
          for (int i = 0; i < 600; ++i)
          {
          temporal[i].setWidth(83);
          temporal[i].setHeight(800);
          temporal[i].setBytesPerPixel(2);
          temporal[i].setDataType( CUShortImage::IDT_SHORT );
          temporal[i].setImageFormat( CUShortImage::IF_LUMINANCE );
          temporal[i].setData( (uint8_t *) dsiOp.getDisparitySpaceImage().getColumnDispSlice ( i ) );
                
          //for (int j = 0; j < 800; ++j)
          //    *(( short int *)(temporal[i].getData() + j)) = (short int)((j-400.)/400. * 8192);

          list_p -> addImage ( temporal[i], 4*(i%200), (i/150)*150, 4, 150 );
                

          short int *resul#if 0
          t_p = (short int *) testDispImg.getData();

          for (int j = 1; j < 799; ++j)
          {
          short int *scores_p = (short int *) temporal[i].getData();
          scores_p += j * 83;

          unsigned int minScore_ui;
          int bestDisp_i = 0;
                    
          for (int d = -1; d <= std::min(81, j); ++d )
          {
          if (minScore_ui > (unsigned)abs(scores_p[d+1]) || d==-1)
          {
          minScore_ui = abs(scores_p[d+1]);
          bestDisp_i = d;
          }
                        
          }
          //printf("%i %i best disp is %i\n", i, j, bestDisp_i);
          result_p[i*800+j]=bestDisp_i;
          }
          }
 
          list_p = getDrawingList ("Test Disparity Image");
          list_p -> setVisibility(true);
          list_p -> setPosition(S2D<int>(2,2));
          list_p -> addImage ( testDispImg, 0, 0, 800, 600, 1./80. );
        */
    }

    //m_stereoOp.setDisparityRange ( S2D<int>( 0, 100 ) );

    /*
    C3DMatrix mat;
    mat.loadIdentity();
    
    mat.rotateX(0.014128);
    mat.rotateY(0);
    mat.rotateZ(0.051491);
    mat.print();
    */

    if ( m_computeResiduum_b )
        computeResiduum ( m_dispImgDPwithBounds );

    return COperator::cycle();
}

void 
CKmStereoOp::computeResiduum ( const CFloatImage & f_dispImg )
{
    CFloatImage * predDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Disparity Image") );

    if (!predDispImg_p) return;

    if ( m_dispImgResiduum.getSize() != f_dispImg.getSize() )
    {
        m_dispImgResiduum.freeMemory();
        m_dispImgResiduum.setSize(f_dispImg.getSize());
        m_dispImgResiduum.ensureAllocation();
    }

    float *res_p = f_dispImg.getData();
    float *pred_p = predDispImg_p -> getData();
    float *dst_p = m_dispImgResiduum.getData();
    const float * const end_p = dst_p + f_dispImg.getSize().width * f_dispImg.getSize().height;

    for ( ; dst_p < end_p; ++res_p, ++pred_p, ++dst_p )
        if (*res_p > 0 && *pred_p > 0)
            *dst_p = *res_p - *pred_p;
        else
            *dst_p = 0;
}


void CKmStereoOp::testDSI()
{

    double start_d = 0;
    double end_d = 0;

    /*******************************************************************************/

    if ( m_computeWTA_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif
        computeWTA();
#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif
        printf("WTA %f milliseconds\n",
               (end_d-start_d)*1000.);
    }

    if ( m_computeWTAwithBounds_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        computeWTAwithBounds();

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

        printf("WTAWBounds %f milliseconds\n",
               (end_d-start_d)*1000.);
    }
   
    if ( m_computeDP_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        computeDP();

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

        printf("DP %f milliseconds\n",
               (end_d-start_d)*1000.);
    }
    
    if ( m_computeDPwithPred_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        computeDPWithPrediction();

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

        printf("DP with Pred %f milliseconds\n",
               (end_d-start_d)*1000.);
    }

    if ( m_computeVDP_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        computeVerticalDP();

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

        printf("VDP %f milliseconds\n",
               (end_d-start_d)*1000.);
    }

    if ( m_computeDPwithBounds_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif
        
        computeDPwithBounds( m_dispImgDPwithBounds, 
                             m_dsi,
                             "Predicted Min Disp Image",
                             "Predicted Max Disp Image",
                             "Predicted Disparity Image",
                             "First Order Disp U" );
        
        computeDPwithBounds( m_dispImgVDP,
                             m_rightDsi,
                             "Predicted Min Right Disp Image",
                             "Predicted Max Right Disp Image",
                             "Predicted Right Disparity Image",
                             "NULL" );
        
        doLR_RL_Check(m_dispImgDPwithBounds, m_dispImgVDP);
        
#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

        printf("DPWBounds %f milliseconds\n",
               (end_d-start_d)*1000.);
    }
    
    if ( m_computePyrDP_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        computePyrDP();

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

        printf("PyrDP %f milliseconds\n",
               (end_d-start_d)*1000.);
        registerOutput ( "Disparity Image", &m_dispImgPyrDP  );
    }
    else
    {
        registerOutput ( "Disparity Image", NULL  );
    }

    if ( m_computePyrDPwithBounds_b )
    {
#if defined ( _OPENMP )
        start_d = omp_get_wtime();
#endif

        computePyrDPwithBounds();

#if defined ( _OPENMP )
        end_d = omp_get_wtime();
#endif

        printf("PyrDPWBounds %f milliseconds\n",
               (end_d-start_d)*1000.);
    }
}

void
CKmStereoOp::computeDP()
{
    const int w_i      = m_dispImgDP.getWidth();
    const int h_i      = m_dispImgDP.getHeight();

    CDsiOpFloat_t::SDispSpaceImage dsi = m_dsi.getDisparitySpaceImage( );    

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);

    CFloatImage * predDispU_p   = dynamic_cast<CFloatImage *>(getInput ( "First Order Disp U" ) );

    //#undef _OPENMP
#if defined ( _OPENMP )
    CDynamicProgrammingOp dynProgOpVectors[m_numThreads_ci];
    std::vector<int> resVectors[m_numThreads_ci];
    bool initializedVector[m_numThreads_ci];

    for (int i = 0; i < m_numThreads_ci; ++i) 
    {
        dynProgOpVectors[i].setCostImageSize( dsi.dispRange_ui, dsi.width_ui );

        dynProgOpVectors[i].setExpectedGradient    ( NULL );

        dynProgOpVectors[i].setDistanceCost        ( m_distCost_f );
        dynProgOpVectors[i].setDistanceTh          ( m_distThreshold_f );
        
        dynProgOpVectors[i].setPredictionCost      ( m_predCost4Prop_f );
        dynProgOpVectors[i].setPredictionTh        ( m_predThreshold4Prop_f );
        
        dynProgOpVectors[i].setInitialCost         ( m_initialCost_f );
        dynProgOpVectors[i].setMinCostValue        ( m_minCostValue_f );
        dynProgOpVectors[i].setMaxCostValue        ( m_maxCostValue_f );
        
        dynProgOpVectors[i].setApplyMedianFilter   ( m_applyMedian_b );
        dynProgOpVectors[i].setMedFiltKernelSize   ( m_mfKernelSize_ui );
        
        dynProgOpVectors[i].setFollowPathTolerance ( m_pathTolerance_ui );
        
        resVectors[i].resize( dsi.width_ui, 0 );
        initializedVector[i] = false;    
    }
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(static)
#else
    static CDynamicProgrammingOp dynProgOp;

    dynProgOp.setCostImageSize( dsi.dispRange_ui, dsi.width_ui );

    dynProgOp.setExpectedGradient    ( NULL );

    dynProgOp.setDistanceCost        ( m_distCost_f );
    dynProgOp.setDistanceTh          ( m_distThreshold_f );

    dynProgOp.setPredictionCost      ( m_predCost4Prop_f );
    dynProgOp.setPredictionTh        ( m_predThreshold4Prop_f );

    dynProgOp.setInitialCost         ( m_initialCost_f );
    dynProgOp.setMinCostValue        ( m_minCostValue_f );
    dynProgOp.setMaxCostValue        ( m_maxCostValue_f );

    dynProgOp.setApplyMedianFilter   ( m_applyMedian_b );
    dynProgOp.setMedFiltKernelSize   ( m_mfKernelSize_ui );

    dynProgOp.setFollowPathTolerance ( m_pathTolerance_ui );

    std::vector<int> resVec( dsi.width_ui, 0 );
    bool initialized_b = false;
#endif
    for (int i = halfMaskSize.y*2; i < h_i-halfMaskSize.y*2; ++i)
    {
#if defined ( _OPENMP )
        const unsigned int threadNum_ui = omp_get_thread_num();
        std::vector<int> &resVec = resVectors[threadNum_ui];
        bool &initialized_b = initializedVector[threadNum_ui];
        CDynamicProgrammingOp &dynProgOp = dynProgOpVectors[threadNum_ui];
#endif        

        if (predDispU_p && m_useGradientPred_b)
        {
            dynProgOp.setExpectedGradient    ( predDispU_p->getScanline(i) );
        }
        
        if ( m_predPathPrevRow_b )
        {
            dynProgOp.setPredictionCost      ( m_predCost4Prop_f );
            dynProgOp.setPredictionTh        ( m_predThreshold4Prop_f );
        }
        else
        {
            dynProgOp.setPredictionCost      ( 0 );
            dynProgOp.setPredictionTh        ( 0 );
        }

        CFloatImage img;
        img.setWidth  ( dsi.dispRange_ui );
        img.setHeight ( dsi.width_ui );
        img.setData   ( dsi.getDispColumnSlice(i) );
        
        if ( m_followPathPrevRow_b && initialized_b )
            dynProgOp.compute ( img, resVec, resVec );
        else
            dynProgOp.compute ( img, resVec );

        float *  result_p = m_dispImgDP.getScanline(i);
        
        for ( int j = 0 ; j < w_i ; ++j, ++result_p )
        {
            *result_p = resVec[j] + dsi.minDisp_i;

            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;
        }

        if (m_updateOptDsi_b)
        {  
            CDynamicProgrammingOp::CNodeImage graphImg = dynProgOp.getGraphImage();
            updateOptDsi ( i, graphImg );
        }
        
        initialized_b = true;
    }

    if ( m_refineDispMap_b )
    {
        refineDispMap ( m_dispImgDP, m_dsi );
    }

    postProcess ( m_dispImgDP );
}


void
CKmStereoOp::computeDPWithPrediction()
{
    const int w_i      = m_dispImgDPwithPred.getWidth();
    const int h_i      = m_dispImgDPwithPred.getHeight();

    CDsiOpFloat_t::SDispSpaceImage dsi = m_dsi.getDisparitySpaceImage( );    

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);

    CFloatImage * predDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Disparity Image") );
    CFloatImage * predDispU_p   = dynamic_cast<CFloatImage *>(getInput ( "First Order Disp U" ) );

    //#undef _OPENMP
#if defined ( _OPENMP )
    CDynamicProgrammingOp dynProgOpVectors[m_numThreads_ci];
    std::vector<int> resVectors[m_numThreads_ci];
    bool initializedVector[m_numThreads_ci];

    for (int i = 0; i < m_numThreads_ci; ++i) 
    {
        dynProgOpVectors[i].setCostImageSize( dsi.dispRange_ui, dsi.width_ui );

        dynProgOpVectors[i].setExpectedGradient    ( NULL );

        dynProgOpVectors[i].setDistanceCost        ( m_distCost_f );
        dynProgOpVectors[i].setDistanceTh          ( m_distThreshold_f );
        
        dynProgOpVectors[i].setPredictionCost      ( m_predCost_f );
        dynProgOpVectors[i].setPredictionTh        ( m_predThreshold_f );
        
        dynProgOpVectors[i].setInitialCost         ( m_initialCost_f );
        dynProgOpVectors[i].setMinCostValue        ( m_minCostValue_f );
        dynProgOpVectors[i].setMaxCostValue        ( m_maxCostValue_f );
        
        dynProgOpVectors[i].setApplyMedianFilter   ( m_applyMedian_b );
        dynProgOpVectors[i].setMedFiltKernelSize   ( m_mfKernelSize_ui );
        
        dynProgOpVectors[i].setFollowPathTolerance ( m_pathTolerance_ui );
        
        resVectors[i].resize( dsi.width_ui, 0 );
        initializedVector[i] = false;    
    }
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(static)
#else
    static CDynamicProgrammingOp dynProgOp;

    dynProgOp.setCostImageSize( dsi.dispRange_ui, dsi.width_ui );

    dynProgOp.setExpectedGradient    ( NULL );

    dynProgOp.setDistanceCost        ( m_distCost_f );
    dynProgOp.setDistanceTh          ( m_distThreshold_f );

    dynProgOp.setPredictionCost      ( m_predCost_f );
    dynProgOp.setPredictionTh        ( m_predThreshold_f );

    dynProgOp.setInitialCost         ( m_initialCost_f );
    dynProgOp.setMinCostValue        ( m_minCostValue_f );
    dynProgOp.setMaxCostValue        ( m_maxCostValue_f );

    dynProgOp.setApplyMedianFilter   ( m_applyMedian_b );
    dynProgOp.setMedFiltKernelSize   ( m_mfKernelSize_ui );

    dynProgOp.setFollowPathTolerance ( m_pathTolerance_ui );

    std::vector<int> resVec( dsi.width_ui, 0 );
    bool initialized_b = false;
#endif
    for (int i = halfMaskSize.y*2; i < h_i-halfMaskSize.y*2; ++i)
    {
#if defined ( _OPENMP )
        const unsigned int threadNum_ui = omp_get_thread_num();
        std::vector<int> &resVec = resVectors[threadNum_ui];
        bool &initialized_b = initializedVector[threadNum_ui];
        CDynamicProgrammingOp &dynProgOp = dynProgOpVectors[threadNum_ui];
#endif
        
        if (predDispU_p && m_useGradientPred_b)
            dynProgOp.setExpectedGradient    ( predDispU_p->getScanline(i) );

        if ( predDispImg_p )
        {
            float *pred_p = predDispImg_p -> getScanline ( i );
            for (int j = 0; j < w_i; ++j, ++pred_p)
            {
                if ( !*pred_p )
                    resVec[j] = -1;
                else
                    resVec[j] = (int)(*pred_p) - dsi.minDisp_i;

                //printf("Assigning a prediction of %i to (%i %i)\n", resVec[j], i, j); 
            }
        }

        CFloatImage img;
        img.setWidth  ( dsi.dispRange_ui );
        img.setHeight ( dsi.width_ui );
        img.setData   ( dsi.getDispColumnSlice(i) );
        
        dynProgOp.compute ( img, resVec );

        float *  result_p = m_dispImgDPwithPred.getScanline(i);
        
        for ( int j = 0 ; j < w_i ; ++j, ++result_p )
        {
            *result_p = resVec[j] + dsi.minDisp_i;

            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;
        }


        if (m_updateOptDsi_b)
        {  
            CDynamicProgrammingOp::CNodeImage graphImg = dynProgOp.getGraphImage();
            updateOptDsi ( i, graphImg );
        }

        initialized_b = true;
    }

    if ( m_refineDispMap_b )
    {
        refineDispMap ( m_dispImgDPwithPred, m_dsi );
    }

    postProcess ( m_dispImgDPwithPred );
}
void
CKmStereoOp::computeVerticalDP()
{
    const int w_i      = m_dispImgVDP.getWidth();
    const int h_i      = m_dispImgVDP.getHeight();

    CDsiOpFloat_t::SDispSpaceImage dsi = m_dsi.getDisparitySpaceImage( );    

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);

    CFloatImage * predDispV_p   = dynamic_cast<CFloatImage *>(getInput ( "First Order Disp V" ) );

    //#undef _OPENMP
#if defined ( _OPENMP )
    CDynamicProgrammingOp dynProgOpVectors[m_numThreads_ci];
    std::vector<int> resVectors[m_numThreads_ci];
    bool initializedVector[m_numThreads_ci];

    for (int i = 0; i < m_numThreads_ci; ++i) 
    {
        dynProgOpVectors[i].setCostImageSize( dsi.dispRange_ui, dsi.height_ui );

        dynProgOpVectors[i].setExpectedGradient    ( NULL );

        dynProgOpVectors[i].setDistanceCost        ( m_distCost_f );
        dynProgOpVectors[i].setDistanceTh          ( m_distThreshold_f );
        
        dynProgOpVectors[i].setPredictionCost      ( m_predCost4Prop_f );
        dynProgOpVectors[i].setPredictionTh        ( m_predThreshold4Prop_f );
        
        dynProgOpVectors[i].setInitialCost         ( m_initialCost_f );
        dynProgOpVectors[i].setMinCostValue        ( m_minCostValue_f );
        dynProgOpVectors[i].setMaxCostValue        ( m_maxCostValue_f );
        
        dynProgOpVectors[i].setApplyMedianFilter   ( m_applyMedian_b );
        dynProgOpVectors[i].setMedFiltKernelSize   ( m_mfKernelSize_ui );
        
        dynProgOpVectors[i].setFollowPathTolerance ( m_pathTolerance_ui );
        
        resVectors[i].resize( dsi.height_ui, 0 );
        initializedVector[i] = false;    
    }
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(static)
#else
    static CDynamicProgrammingOp dynProgOp;

    dynProgOp.setCostImageSize( dsi.dispRange_ui, dsi.height_ui );

    dynProgOp.setExpectedGradient    ( NULL );

    dynProgOp.setDistanceCost        ( m_distCost_f );
    dynProgOp.setDistanceTh          ( m_distThreshold_f );

    dynProgOp.setPredictionCost      ( m_predCost4Prop_f );
    dynProgOp.setPredictionTh        ( m_predThreshold4Prop_f );

    dynProgOp.setInitialCost         ( m_initialCost_f );
    dynProgOp.setMinCostValue        ( m_minCostValue_f );
    dynProgOp.setMaxCostValue        ( m_maxCostValue_f );

    dynProgOp.setApplyMedianFilter   ( m_applyMedian_b );
    dynProgOp.setMedFiltKernelSize   ( m_mfKernelSize_ui );

    dynProgOp.setFollowPathTolerance ( m_pathTolerance_ui );

    std::vector<int> resVec( dsi.height_ui, 0 );
    bool initialized_b = false;
#endif
    for (int j = halfMaskSize.x*2; j < w_i-halfMaskSize.y*2; ++j)
    {
#if defined ( _OPENMP )
        const unsigned int threadNum_ui = omp_get_thread_num();
        std::vector<int> &resVec = resVectors[threadNum_ui];
        bool &initialized_b = initializedVector[threadNum_ui];
        CDynamicProgrammingOp &dynProgOp = dynProgOpVectors[threadNum_ui];
#endif

        float * gradV_p = NULL;
        /// Expected gradient of the solution.
        if (predDispV_p && m_useGradientPred_b)
        {
            gradV_p = new float[dsi.height_ui];
            for (int i = 0 ; i < (int)dsi.height_ui; ++i)
            {
                if (predDispV_p->getScanline(i)[j] > 0)
                    gradV_p[i] = -(int)(predDispV_p->getScanline(i)[j] + .5);
                else
                    gradV_p[i] = -(int)(-predDispV_p->getScanline(i)[j] + .5);
            }
        }

        dynProgOp.setExpectedGradient ( gradV_p );

        if ( m_predPathPrevRow_b )
        {
            dynProgOp.setPredictionCost      ( m_predCost4Prop_f );
            dynProgOp.setPredictionTh        ( m_predThreshold4Prop_f );
        }
        else
        {
            dynProgOp.setPredictionCost      ( 0 );
            dynProgOp.setPredictionTh        ( 0 );
        }

        CFloatImage img ( dsi.dispRange_ui, dsi.height_ui );
        img.ensureAllocation();

        for (int i = 0 ; i < (int)dsi.height_ui; ++i)
        {
            for (int d = 0; d < (int)dsi.dispRange_ui; ++d)
                img[i][d] = *dsi.getPtrAtRowColumnDisp ( i, j, d + dsi.minDisp_i );
        }

        if ( m_followPathPrevRow_b && initialized_b )
            dynProgOp.compute ( img, resVec, resVec );
        else
            dynProgOp.compute ( img, resVec );

        float *  result_p = m_dispImgVDP[0] + j;
        
        for ( int i = 0; i < h_i ; ++i, result_p+=w_i )
        {
            *result_p = resVec[i] + dsi.minDisp_i;

            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;
        }

        if (gradV_p) delete [] gradV_p;
        
        initialized_b = true;

        /*
          if (m_updateOptDsi_b)
          {  
          CDynamicProgrammingOp::CNodeImage graphImg = dynProgOp.getGraphImage();
          updateOptDsi ( j, graphImg );
          }
        */

    }

    if ( m_refineDispMap_b )
    {
        refineDispMap ( m_dispImgVDP, m_dsi );
    }
    
    postProcess ( m_dispImgVDP );
}


void
CKmStereoOp::computePyrDP()
{
    const int w_i      = m_dispImgPyrDP.getWidth();
    const int h_i      = m_dispImgPyrDP.getHeight();

    CDsiOpFloat_t::SDispSpaceImage dsi = m_dsi.getDisparitySpaceImage( );    

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);

    CFloatImage * predDispU_p   = dynamic_cast<CFloatImage *>(getInput ( "First Order Disp U" ) );

    //#undef _OPENMP
#if defined ( _OPENMP )
    CPyramidalDynProg     dynProgOpVectors[m_numThreads_ci];
    std::vector<int> resVectors[m_numThreads_ci];
    bool initializedVector[m_numThreads_ci];

    for (int i = 0; i < m_numThreads_ci; ++i) 
    {
        dynProgOpVectors[i].setPyramidParams( dsi.dispRange_ui, dsi.width_ui, m_maxLevel_ui );
            
        dynProgOpVectors[i].setExpectedGradient    ( NULL );

        dynProgOpVectors[i].setDistanceCost        ( m_distCost_f );
        dynProgOpVectors[i].setDistanceTh          ( m_distThreshold_f );
        
        dynProgOpVectors[i].setPredictionCost      ( 0 );
        dynProgOpVectors[i].setPredictionTh        ( 0 );
        
        dynProgOpVectors[i].setInitialCost         ( m_initialCost_f );
        dynProgOpVectors[i].setMinCostValue        ( m_minCostValue_f );
        dynProgOpVectors[i].setMaxCostValue        ( m_maxCostValue_f );
        
        dynProgOpVectors[i].setApplyMedianFilter   ( m_applyMedian_b );
        dynProgOpVectors[i].setMedFiltKernelSize   ( m_mfKernelSize_ui );
        
        dynProgOpVectors[i].setFollowPathTolerance ( m_pathTolerance_ui );
        
        resVectors[i].resize( dsi.width_ui, 0 );
        initializedVector[i] = false;    
    }
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(static)
#else
    static CPyramidalDynProg       dynProgOp;

    dynProgOp.setPyramidParams( dsi.dispRange_ui, dsi.width_ui, m_maxLevel_ui );

    dynProgOp.setExpectedGradient    ( NULL );

    dynProgOp.setDistanceCost        ( m_distCost_f );
    dynProgOp.setDistanceTh          ( m_distThreshold_f );

    dynProgOp.setPredictionCost      ( 0 );
    dynProgOp.setPredictionTh        ( 0 );

    dynProgOp.setInitialCost         ( m_initialCost_f );
    dynProgOp.setMinCostValue        ( m_minCostValue_f );
    dynProgOp.setMaxCostValue        ( m_maxCostValue_f );

    dynProgOp.setApplyMedianFilter   ( m_applyMedian_b );
    dynProgOp.setMedFiltKernelSize   ( m_mfKernelSize_ui );

    dynProgOp.setFollowPathTolerance ( m_pathTolerance_ui );

    std::vector<int> resVec( dsi.width_ui, 0 );
    bool initialized_b = false;
#endif
    for (int i = halfMaskSize.y*2; i < h_i-halfMaskSize.y*2; ++i)
    {
#if defined ( _OPENMP )
        const unsigned int threadNum_ui = omp_get_thread_num();
        std::vector<int> &resVec = resVectors[threadNum_ui];
        bool &initialized_b = initializedVector[threadNum_ui];
        CPyramidalDynProg  &dynProgOp = dynProgOpVectors[threadNum_ui];
#endif
        
        if (predDispU_p && m_useGradientPred_b)
            dynProgOp.setExpectedGradient    ( predDispU_p->getScanline(i) );
        
        /// Dynamic programming comes here.
        CFloatImage img;
        img.setWidth  ( dsi.dispRange_ui );
        img.setHeight ( dsi.width_ui );
        img.setData   ( dsi.getDispColumnSlice(i) );
        
        dynProgOp.compute ( img, resVec );

        float *  result_p = m_dispImgPyrDP.getScanline(i);
        
        for ( int j = 0 ; j < w_i ; ++j, ++result_p )
        {
            *result_p = resVec[j] + dsi.minDisp_i;

            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;
        }

        initialized_b = true;

        if (m_updateOptDsi_b)
        {  
            CDynamicProgrammingOp::CNodeImage graphImg = dynProgOp.getDynProgObj(0)->getGraphImage();
            updateOptDsi ( i, graphImg );
        }
    }

    if ( m_refineDispMap_b )
    {
        refineDispMap ( m_dispImgPyrDP, m_dsi );
    }

    postProcess ( m_dispImgPyrDP );
}

void
CKmStereoOp::computePyrDPwithBounds()
{
    const int w_i      = m_dispImgPyrDPwithBounds.getWidth();
    const int h_i      = m_dispImgPyrDPwithBounds.getHeight();
    
    CDsiOpFloat_t::SDispSpaceImage dsi = m_dsi.getDisparitySpaceImage( );    

    CFloatImage * minDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Min Disp Image") );
    CFloatImage * maxDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Max Disp Image") );
    
    CFloatImage * predDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Disparity Image") );

    CFloatImage * predDispU_p   = dynamic_cast<CFloatImage *>(getInput ( "First Order Disp U" ) );

    if ( !minDispImg_p || !maxDispImg_p )
        return;
    
    float scaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);
    
    //#undef _OPENMP
#if defined ( _OPENMP )

    CPyramidalDynProg   dynProgOpVectors [m_numThreads_ci];
    std::vector<int>    resVectors       [m_numThreads_ci];
    std::vector<int>    predVectors      [m_numThreads_ci];
    std::vector<int>    pathTolVectors   [m_numThreads_ci];

    for (int i = 0; i < m_numThreads_ci; ++i) 
    {
        dynProgOpVectors[i].setPyramidParams( dsi.dispRange_ui, dsi.width_ui, m_maxLevel_ui );

        dynProgOpVectors[i].setExpectedGradient    ( NULL );

        dynProgOpVectors[i].setDistanceCost        ( m_distCost_f );
        dynProgOpVectors[i].setDistanceTh          ( m_distThreshold_f );
        
        dynProgOpVectors[i].setPredictionCost      ( m_predCost_f );
        dynProgOpVectors[i].setPredictionTh        ( m_predThreshold_f );
        
        dynProgOpVectors[i].setInitialCost         ( m_initialCost_f );
        dynProgOpVectors[i].setMinCostValue        ( m_minCostValue_f );
        dynProgOpVectors[i].setMaxCostValue        ( m_maxCostValue_f );
        
        dynProgOpVectors[i].setApplyMedianFilter   ( m_applyMedian_b );
        dynProgOpVectors[i].setMedFiltKernelSize   ( m_mfKernelSize_ui );
        
        dynProgOpVectors[i].setFollowPathTolerance ( m_pathTolerance_ui );
        
        resVectors[i].resize     ( dsi.width_ui, 0 );
        predVectors[i].resize    ( dsi.width_ui, 0 );
        pathTolVectors[i].resize ( dsi.width_ui, 0 );
    }
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(dynamic)
#else
    //CDynamicProgrammingOp dynProgOp ( dsi.dispRange_ui, dsi.width_ui );
    static CPyramidalDynProg       dynProgOp;

    dynProgOp.setPyramidParams( dsi.dispRange_ui, dsi.width_ui, m_maxLevel_ui );

    dynProgOp.setExpectedGradient    ( NULL );

    dynProgOp.setDistanceCost        ( m_distCost_f );
    dynProgOp.setDistanceTh          ( m_distThreshold_f );

    dynProgOp.setPredictionCost      ( m_predCost_f );
    dynProgOp.setPredictionTh        ( m_predThreshold_f );

    dynProgOp.setInitialCost         ( m_initialCost_f );
    dynProgOp.setMinCostValue        ( m_minCostValue_f );
    dynProgOp.setMaxCostValue        ( m_maxCostValue_f );

    dynProgOp.setApplyMedianFilter   ( m_applyMedian_b );
    dynProgOp.setMedFiltKernelSize   ( m_mfKernelSize_ui );

    dynProgOp.setFollowPathTolerance ( m_pathTolerance_ui );

    std::vector<int> resVec  ( dsi.width_ui, 0 );
    std::vector<int> predVec ( dsi.width_ui, 0 );
    std::vector<int> pathTol ( dsi.width_ui, 0 );

#endif
    for (int i = halfMaskSize.y*2; i < h_i-halfMaskSize.y*2; ++i)
    {
#if defined ( _OPENMP )
        const unsigned int threadNum_ui = omp_get_thread_num();
        std::vector<int> &resVec        = resVectors[threadNum_ui];
        std::vector<int> &predVec       = predVectors[threadNum_ui];
        std::vector<int> &pathTol       = pathTolVectors[threadNum_ui];
        CPyramidalDynProg  &dynProgOp   = dynProgOpVectors[threadNum_ui];
#endif
        
        if (predDispU_p && m_useGradientPred_b)
            dynProgOp.setExpectedGradient    ( predDispU_p->getScanline(i) );

        float *minD_p, *maxD_p, *pred_p = NULL;

        minD_p = minDispImg_p  -> getScanline ( i );
        maxD_p = maxDispImg_p  -> getScanline ( i );

        if ( predDispImg_p )
            pred_p = predDispImg_p -> getScanline ( i );
        
        for (int j = 0; j < w_i; ++j, ++minD_p, ++maxD_p, ++pred_p)
        {
            int maxDisp_i = m_maxDisparity_i*scaleFactor_f;
            int minDisp_i = 0;

            bool bounded_b = true;
            
            if ( //*minD_p > j-halfMaskSize.x ||
                    //*maxD_p < j-halfMaskSize.x ||
                    *maxD_p < 1. ||
                    *minD_p <= 0.f )
            {
                bounded_b = false;
            }
            else
            {
                maxDisp_i = (int)(std::min ( *maxD_p, (float)j-halfMaskSize.x ) + .5);
                minDisp_i = std::max ( *minD_p, 0.f );
            }

            if ( predDispImg_p && bounded_b )
            {
                if ( *pred_p < minDisp_i ||
                     *pred_p > maxDisp_i )
                {
                    //printf("pred %f not within %i and %i\n", *pred_p, minDisp_i, maxDisp_i );
                    predVec[j] = -1;
                }
                else
                {
                    predVec[j] = (int)(*pred_p) - dsi.minDisp_i;
                }       
            }
            else
            {
                predVec[j] = -1;
            }

            pathTol[j] = (maxDisp_i - minDisp_i)/2 + 1;

            //printf("for row %i col %i tolerance is %i (*minD = %f *maxD = %f) minDisp_i - %i maxDisp_i = %i\n", 
            //       i, j, pathTol[j],
            //       *minD_p, *maxD_p, minDisp_i, maxDisp_i );

            pathTol[j] = std::max( pathTol[j], 1 );

            resVec[j]  = predVec[j];
                
            //printf ( "for row %i column %i U(%i, +-%i) min: %f max: %f minD_f = %f maxD_f = %f\n", 
            //         i, j, predVec[j], pathTol[j], *minD_p, *maxD_p, minD_f, maxD_f  );
        }

        /// Dynamic programming comes here.
        CFloatImage img;
        img.setWidth  ( dsi.dispRange_ui );
        img.setHeight ( dsi.width_ui );
        img.setData   ( dsi.getDispColumnSlice(i) );

        /// Compute dynamic programming.
        dynProgOp.compute ( img, resVec, predVec, pathTol );
 
        float *  result_p = m_dispImgPyrDPwithBounds.getScanline(i);
        for ( int j = 0 ; j < w_i ; ++j, ++result_p )
        {        
            *result_p = resVec[j] + dsi.minDisp_i;
            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;
        }

        if (m_updateOptDsi_b)
        {  
            CDynamicProgrammingOp::CNodeImage graphImg = dynProgOp.getDynProgObj(0)->getGraphImage();
            updateOptDsi ( i, graphImg );
        }
    }

    if ( m_refineDispMap_b )
    {
        refineDispMap ( m_dispImgPyrDPwithBounds, m_dsi );
    }

    postProcess ( m_dispImgPyrDPwithBounds );
}

void
CKmStereoOp::computeDPwithBounds( CFloatImage         &fr_dispImg,
                                  const CDsiOpFloat_t &f_dsi,
                                  const std::string    f_idMinDisp_str,
                                  const std::string    f_idMaxDisp_str,
                                  const std::string    f_idDispPred_str,
                                  const std::string    f_idFirstDisp_str )
                                  
                                  
                                  
{
    const int w_i      = fr_dispImg.getWidth();
    const int h_i      = fr_dispImg.getHeight();
    
    CDsiOpFloat_t::SDispSpaceImage dsi = f_dsi.getDisparitySpaceImage( );    

    const CFloatImage * minDispImg_p = dynamic_cast<CFloatImage *>(getInput ( f_idMinDisp_str ) );
    const CFloatImage * maxDispImg_p = dynamic_cast<CFloatImage *>(getInput ( f_idMaxDisp_str ) );
    //printf("registered input for max disp img is %p\n", maxDispImg_p );

    CFloatImage * predDispImg_p = dynamic_cast<CFloatImage *>(getInput ( f_idDispPred_str ) );
   
    CFloatImage * predDispU_p   = dynamic_cast<CFloatImage *>(getInput ( f_idFirstDisp_str ) );

    if ( !minDispImg_p || !minDispImg_p )
        return;
    
    float scaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);
    
#if defined ( _OPENMP )

    CDynamicProgrammingOp dynProgOpVectors[m_numThreads_ci];
    std::vector<int>    resVectors       [m_numThreads_ci];
    std::vector<int>    predVectors      [m_numThreads_ci];
    std::vector<int>    pathTolVectors   [m_numThreads_ci];

    for (int i = 0; i < m_numThreads_ci; ++i) 
    {
        dynProgOpVectors[i].setCostImageSize( dsi.dispRange_ui, dsi.width_ui );

        /// Expected gradient of the solution.
        dynProgOpVectors[i].setExpectedGradient    ( NULL );

        dynProgOpVectors[i].setDistanceCost        ( m_distCost_f );
        dynProgOpVectors[i].setDistanceTh          ( m_distThreshold_f );
        
        dynProgOpVectors[i].setPredictionCost      ( m_predCost_f );
        dynProgOpVectors[i].setPredictionTh        ( m_predThreshold_f );
        
        dynProgOpVectors[i].setInitialCost         ( m_initialCost_f );
        dynProgOpVectors[i].setMinCostValue        ( m_minCostValue_f );
        dynProgOpVectors[i].setMaxCostValue        ( m_maxCostValue_f );
        
        dynProgOpVectors[i].setApplyMedianFilter   ( m_applyMedian_b );
        dynProgOpVectors[i].setMedFiltKernelSize   ( m_mfKernelSize_ui );
        
        dynProgOpVectors[i].setFollowPathTolerance ( m_pathTolerance_ui );
        
        resVectors[i].resize     ( dsi.width_ui, 0 );
        predVectors[i].resize    ( dsi.width_ui, 0 );
        pathTolVectors[i].resize ( dsi.width_ui, 0 );
    }
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(static)
#else
    static CDynamicProgrammingOp dynProgOp;

    dynProgOp.setCostImageSize( dsi.dispRange_ui, dsi.width_ui );

    /// Expected gradient of the solution.
    dynProgOp.setExpectedGradient    ( NULL );

    dynProgOp.setDistanceCost        ( m_distCost_f );
    dynProgOp.setDistanceTh          ( m_distThreshold_f );

    dynProgOp.setPredictionCost      ( m_predCost_f );
    dynProgOp.setPredictionTh        ( m_predThreshold_f );

    dynProgOp.setInitialCost         ( m_initialCost_f );
    dynProgOp.setMinCostValue        ( m_minCostValue_f );
    dynProgOp.setMaxCostValue        ( m_maxCostValue_f );

    dynProgOp.setApplyMedianFilter   ( m_applyMedian_b );
    dynProgOp.setMedFiltKernelSize   ( m_mfKernelSize_ui );

    dynProgOp.setFollowPathTolerance ( m_pathTolerance_ui );

    std::vector<int> resVec  ( dsi.width_ui, 0 );
    std::vector<int> predVec ( dsi.width_ui, 0 );
    std::vector<int> pathTol ( dsi.width_ui, 0 );

#endif
    for (int i = halfMaskSize.y*2; i < h_i-halfMaskSize.y*2; ++i)
    {
#if defined ( _OPENMP )
        const unsigned int threadNum_ui  = omp_get_thread_num();
        std::vector<int> &resVec         = resVectors[threadNum_ui];
        std::vector<int> &predVec        = predVectors[threadNum_ui];
        std::vector<int> &pathTol        = pathTolVectors[threadNum_ui];
        CDynamicProgrammingOp &dynProgOp = dynProgOpVectors[threadNum_ui];
#endif

        if (predDispU_p && m_useGradientPred_b)
            dynProgOp.setExpectedGradient    ( predDispU_p->getScanline(i) );

        float *minD_p, *maxD_p, *pred_p = NULL;

        minD_p = minDispImg_p->getScanline(i);
        maxD_p = maxDispImg_p->getScanline(i);
            
        if ( predDispImg_p )
            pred_p = predDispImg_p -> getScanline ( i );
        
        for (int j = 0; j < w_i; ++j, ++minD_p, ++maxD_p, ++pred_p)
        {
            int maxDisp_i = m_maxDisparity_i*scaleFactor_f;
            int minDisp_i = 0;

            //printf("for (%i %i) minDisp = %f maxDisp = %f ",
            //           i, j, *minD_p, *maxD_p );

            if ( //*minD_p > j-halfMaskSize.x ||
                    //*maxD_p < j-halfMaskSize.x ||
                    *maxD_p < 1. ||
                    *minD_p <= 0.f )
            {
            }
            else
            {   
                maxDisp_i = (int)(std::min ( *maxD_p, (float)j-halfMaskSize.x ) + .5);
                minDisp_i = std::max ( *minD_p, 0.f );
            }

            if ( predDispImg_p )
            {
                if ( *pred_p <= 0 )
                {
                    predVec[j] = (int)((minDisp_i + maxDisp_i)/2 + .5) - dsi.minDisp_i;
                    resVec[j] = -1;
                }
                else
                {
                    if ( *pred_p < minDisp_i ) minDisp_i = *pred_p - 1;
                    if ( *pred_p > maxDisp_i ) maxDisp_i = *pred_p + 1.5f;

                    predVec[j] = (int)(*pred_p) - dsi.minDisp_i;
                    resVec[j]  = predVec[j];
                }
            }
            else
            {
                predVec[j] = (int)((minDisp_i + maxDisp_i)/2 + .5) - dsi.minDisp_i;
                resVec[j] = -1;                    
            }
            
            pathTol[j] = (int)((maxDisp_i - minDisp_i)/2 + .5)+m_deltaDisp_ui;

            //printf("for row %i col %i pred: %i (used %i) tolerance is %i (*minD = %f *maxD = %f) minDisp_i - %i maxDisp_i = %i\n", 
            //       i, j, predVec[j], resVec[j], pathTol[j],
            //       *minD_p, *maxD_p, minDisp_i, maxDisp_i );

            pathTol[j] = std::max( pathTol[j], 2 );
        }

        /// Dynamic programming comes here.
        CFloatImage img;
        img.setWidth  ( dsi.dispRange_ui );
        img.setHeight ( dsi.width_ui );
        img.setData   ( dsi.getDispColumnSlice(i) );

        /// Compute dynamic programming.
        //dynProgOp.compute ( img, resVec, predVec, pathTol );
        dynProgOp.compute ( img, resVec, predVec, pathTol );
 
        float *  result_p = fr_dispImg.getScanline(i);
        for ( int j = 0 ; j < w_i ; ++j, ++result_p )
        {        
            *result_p = resVec[j] + dsi.minDisp_i;
            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;

            //printf("result for row %i col %i is %i\n", 
            //       i, j, resVec[j] );
        }

        if (m_updateOptDsi_b)
        {  
            CDynamicProgrammingOp::CNodeImage graphImg = dynProgOp.getGraphImage();
            updateOptDsi ( i, graphImg );
        }
    }

    if ( m_refineDispMap_b )
    {
        refineDispMap ( fr_dispImg, f_dsi );
    }

    postProcess ( fr_dispImg );
}

void
CKmStereoOp::computeWTAwithBounds()
{
    const int w_i      = m_dispImgWTAwithBounds.getWidth();
    const int h_i      = m_dispImgWTAwithBounds.getHeight();

    float scaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);
    
    CFloatImage * minDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Min Disp Image") );
    CFloatImage * maxDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Max Disp Image") );

    bool constraintSearch_b = true && minDispImg_p && maxDispImg_p;

#if defined ( _OPENMP )
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(static)
#endif
    for (int i = halfMaskSize.y; i < h_i-halfMaskSize.y; ++i)
    {
        float *minD_p = NULL, *maxD_p = NULL;

        float * result_p = m_dispImgWTAwithBounds.getScanline(i);

        if (constraintSearch_b)
        {
            minD_p = minDispImg_p->getScanline(i) + halfMaskSize.x;
            maxD_p = maxDispImg_p->getScanline(i) + halfMaskSize.x;
        }

        for (int j =  halfMaskSize.x; j < w_i- halfMaskSize.x; ++j, ++minD_p, ++maxD_p, ++result_p)
        {
            float minScore_f = 0;
            int   bestDisp_i = 0;

            int minDisp_i = std::max(-1, -j+halfMaskSize.x);
            int maxDisp_i = m_maxDisparity_i*scaleFactor_f;
            
            if (constraintSearch_b && *minD_p > 0 && *maxD_p > *minD_p)
            {
                minDisp_i = std::max(int(*minD_p-1), minDisp_i );
                maxDisp_i = std::min(int(*maxD_p+.5+1), maxDisp_i );
            }
            else
            {
                minDisp_i = -1;
            }

            maxDisp_i = std::min(maxDisp_i, j-halfMaskSize.x);
            
            for ( int d = minDisp_i; d <= maxDisp_i; ++d )
            {
                float  score_f = *(m_dsi.getDisparitySpaceImage().getPtrAtRowColumnDisp( i, j, d ) ); //!111

                if ( minScore_f > score_f || 
                     d == minDisp_i )
                {
                    minScore_f = score_f;
                    bestDisp_i = d;
                }    
            }

            *result_p = bestDisp_i;

            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;
            
            //printf("for row %i col %i pred: %f *minD = %f *maxD = %f\n", 
            //       i, j, *minD_p, *maxD_p );

        }
    }

    if ( m_refineDispMap_b )
    {
        refineDispMap ( m_dispImgWTAwithBounds, m_dsi );
    }

    postProcess ( m_dispImgWTAwithBounds );
}


void CKmStereoOp::computeWTA()
{
    const int w_i      = m_dispImgWTA.getWidth();
    const int h_i      = m_dispImgWTA.getHeight();

    float scaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );

    S2D<int> halfMaskSize ( m_zssdKernelWidth_ui / 2 , m_zssdKernelHeight_ui / 2);    
    
#if defined ( _OPENMP )
#pragma omp parallel for num_threads(m_numThreads_ci) schedule(static)
#endif
    for (int i = halfMaskSize.y; i < h_i-halfMaskSize.y; ++i)
    {
        float *  result_p = m_dispImgWTA.getScanline(i) +  halfMaskSize.x;

        for (int j = halfMaskSize.x*2; j < w_i-halfMaskSize.x; ++j, ++result_p)
        {
            float minScore_f = 0;
            int   bestDisp_i = 0;

            int minDisp_i = -1;
            int maxDisp_i = m_maxDisparity_i*scaleFactor_f;

            maxDisp_i = std::min(maxDisp_i, j-halfMaskSize.x);
            
            for ( int d = minDisp_i; d <= maxDisp_i; ++d )
            {
                float  score_f = *(m_dsi.getDisparitySpaceImage().getPtrAtRowColumnDisp( i, j, d ) ); //!111

                //printf("score for  (%i %i, %i) is %f (best score %f at disp %i)\n",
                //       i, j, d, score_f, minScore_f, minDisp_i );

                if ( minScore_f > score_f || 
                     d == minDisp_i )
                {
                    minScore_f = score_f;
                    bestDisp_i = d;
                }    
            }
            //printf("%i %i best disp is %i\n", i, j, bestDisp_i);
            //printf("%i %i bestDisp is %i with score %f\n", i, j, bestDisp_i, minScore_f);
            *result_p = bestDisp_i;

            if ( *result_p < 0 ||
                 j - *result_p < 0 ) *result_p = 0;
        }
    }

    
    if ( m_refineDispMap_b )
    {
        refineDispMap ( m_dispImgWTA, m_dsi );
    }

    postProcess ( m_dispImgWTA );
}


/// Show event.
bool CKmStereoOp::show()
{

#if 0
    const float alpha_f = 0.3;
    float rot[3][3] = { {1., 0.f, 0.f}, 
                        {0.f, cos(alpha_f), -sin(alpha_f)},
                        {0.f, sin(alpha_f), cos(alpha_f)} };
    float trans[3] = { 0.0, 0.0, 0.0 };
#else
    float rot[3][3] = { {0.9935683662767510, -0.1119083819746910, 0.0172747092004820 },            
                        {0.1131572654828300, 0.9756647791024320, -0.1878128645419600 },
                        {0.0041635084487756, 0.1885596798438500, 0.9820529070954280} };
    float trans[3] = {  0.6775398573062020, -0.2608627806075020, 0.3484528762820530 };
#endif
    CDrawingList *  list_p;

    float scaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );

    list_p = getDrawingList ("KM Stereo Disparity Image");
    list_p -> clear();
    list_p -> addImage ( m_leftRes, 0, 0, 800, 600, 1.f/(m_maxDisparity_i*scaleFactor_f) );
    

    float scale_f = 1./(m_maxDisparity_i*scaleFactor_f);
    
    list_p = getDrawingList ("WTA Raw");
    list_p -> clear();
    list_p -> addImage ( m_dispImgWTA, 0, 0, 800, 600, scale_f );

    list_p = getDrawingList ("WTA w/ Bounds");
    list_p -> clear();
    list_p -> addImage ( m_dispImgWTAwithBounds, 0, 0, 800, 600, scale_f );


    list_p = getDrawingList ("Dynamic Programming");
    list_p -> clear();
    list_p -> addImage ( m_dispImgDP, 0, 0, 800, 600,  scale_f );

    list_p = getDrawingList ("Dynamic Programming with Pred");
    list_p -> clear();
    list_p -> addImage ( m_dispImgDPwithPred, 0, 0, 800, 600,  scale_f );

    list_p = getDrawingList ("Vertical Dynamic Programming");
    list_p -> clear();
    list_p -> addImage ( m_dispImgVDP, 0, 0, 800, 600,  scale_f );

    list_p = getDrawingList ("Dynamic Programming w/ Bounds");
    list_p -> clear();
    list_p -> addImage ( m_dispImgDPwithBounds, 0, 0, 800, 600,  scale_f );


    list_p = getDrawingList ("Pyramidal Dynamic Programming");
    list_p -> clear();
    list_p -> addImage ( m_dispImgPyrDP, 0, 0, 800, 600,  scale_f );

    list_p = getDrawingList ("Pyramidal Dynamic Programming w/ Bounds");
    list_p -> clear();
    list_p -> addImage ( m_dispImgPyrDPwithBounds, 0, 0, 800, 600,  scale_f );

    list_p = getDrawingList ("Residuum");
    list_p -> clear();
    list_p -> addImage ( m_dispImgResiduum, 0, 0, 800, 600, 1./m_displayResiduumNorm_f, 0.5 );

    ///////////////////////

    CStereoCamera * cam_p = dynamic_cast<CStereoCamera *>(getInput ( "Rectified Camera" ) );

    if (!cam_p) return false;

    const double scaleX_d = 10.f;
    const double scaleZ_d = 10.f;
    
    double x, y, z, u_d, v_d;
        
    SRgb  colorLine;
    CColorEncoding colorEnc ( CColorEncoding::CET_BLUE2GREEN2RED );   

    list_p = getDrawingList ("Stereo Bird View");
    list_p -> clear();

    int width_i  = m_leftRes.getWidth();
    int height_i = m_leftRes.getHeight();
 
    int dispWidth_i  = 800;
    int dispHeight_i = 600;

    float *disps_p = (float*) m_leftRes.getData();
    
    disps_p += height_i * width_i - 1;

    for (int i = height_i-1; i >= 0 ; --i)
    {
        for (int j = width_i-1; j >= 0; --j, --disps_p)
        {
            if (*disps_p > 0.5)
            {
                cam_p -> image2World ( j, i, *disps_p, x, y, z );
                
                if (1)
                {
                    x -= trans[0];
                    z -= trans[1];
                    y -= trans[2];
                    
                    double newX_d = x * rot[0][0] + z * rot[1][0] + y * rot[2][0];
                    double newZ_d = x * rot[0][1] + z * rot[1][1] + y * rot[2][1];
                    double newY_d = x * rot[0][2] + z * rot[1][2] + y * rot[2][2];
                    
                    x = newX_d;
                    y = newY_d;
                    z = newZ_d;
                }

                if (y > -2.2)
                {
                    colorEnc.colorFromValue ( y, 
                                              -2.25,
                                              -1.5,
                                              colorLine );
                    
                    u_d = x * scaleX_d + dispWidth_i/2;
                    v_d = dispHeight_i/2 - z * scaleZ_d;
                    
                    if ( u_d >= 0 && u_d < dispWidth_i &&
                         v_d >= 0 && v_d < dispHeight_i )
                    {
                        list_p -> setLineColor ( SRgba( colorLine, 255 ) );
                        list_p -> setFillColor ( SRgba( colorLine, 20) );
                        list_p -> addFilledRectangle ( u_d-.5, v_d-.5, u_d + .5, v_d + .5);
                    }
                }
            }
        }   
    }

    show3D();

    return COperator::show();
}

void CKmStereoOp::show3D()
{
    const CStereoCamera * camera_p  = 
        dynamic_cast<CStereoCamera *>(getInput ( "Rectified Camera" ) );
    CUShortImage *  leftImg_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Left Image" ) );

    if ( !camera_p || !leftImg_p)
        return;

    CFloatImage * dispImg_p = NULL;

    if (getDrawingList ("WTA Raw")->isVisible())
        dispImg_p  = &m_dispImgWTA;
    else if (getDrawingList ("WTA w/ Bounds") ->isVisible() )
        dispImg_p  =  &m_dispImgWTAwithBounds;
    else if (getDrawingList ("Dynamic Programming") ->isVisible() )
        dispImg_p  = &m_dispImgDP;
    else if (getDrawingList ("Dynamic Programming with Pred") ->isVisible() )
        dispImg_p  = &m_dispImgDPwithPred;
    else if (getDrawingList ("Vertical Dynamic Programming") ->isVisible() )
        dispImg_p  = &m_dispImgVDP;
    else if (getDrawingList ("Dynamic Programming w/ Bounds") ->isVisible() )
        dispImg_p  = &m_dispImgDPwithBounds;
    else if (getDrawingList ("Pyramidal Dynamic Programming") ->isVisible() )
        dispImg_p  = &m_dispImgPyrDP;
    else if (getDrawingList ("Pyramidal Dynamic Programming w/ Bounds") ->isVisible() )
        dispImg_p  = &m_dispImgPyrDPwithBounds;
    

    if (m_show3DPoints_b)
    {
        if (dispImg_p)
        {
            int w_i = dispImg_p->getWidth();
            int h_i = dispImg_p->getHeight();
            
            for (int i = 0; i < h_i; ++i)
            {
                float *     disp_p = dispImg_p->getScanline(i);
                uint16_t *  limg_p = leftImg_p->getScanline(i);
                
                for ( int j = 0; j < w_i; ++j, ++disp_p, ++limg_p )
                {
                    if (*disp_p > 0  )
                    {
                        C3DVector point;
                        camera_p -> image2World ( j, i, *disp_p, point );
                        SRgb color(*limg_p/256, *limg_p/256, *limg_p/256 );
                        m_3dViewer_p -> addPoint ( point, color, C3DVector(0,0,0) );                    
                    }
                }
            }
        }
    }

    if (m_show3DMesh_b)
    {
        m_vectorImg.clear();
        if (dispImg_p)
        {
            if (m_vectorImg.getSize()!=dispImg_p->getSize())
            {
                m_vectorImg.freeMemory();
                m_vectorImg.setSize(dispImg_p->getSize());
                m_vectorImg.ensureAllocation();
            }
            
            int w_i = dispImg_p->getWidth();
            int h_i = dispImg_p->getHeight();

            for (int i = 0; i < h_i; ++i)
            {
                float *     disp_p = dispImg_p->getScanline(i);
                uint16_t *  limg_p = leftImg_p->getScanline(i);
                C3DVector * vimg_p = m_vectorImg.getScanline(i);
                
                for ( int j = 0; j < w_i; ++j, ++disp_p, ++limg_p, ++vimg_p )
                {
                    if (*disp_p > 0  )
                    {
                        camera_p -> image2World ( j, i, *disp_p, *vimg_p );
                    }
                }
            }

            const float k_f = camera_p -> getBaseline() * camera_p -> getFu();
            m_3dViewer_p -> add3DVectorImg4Mesh ( &m_vectorImg,
                                                  leftImg_p,
                                                  m_maxDist4Mesh_f,
                                                  m_maxDisp4Mesh_f/k_f );
        }
    }
}


/// Init event.
bool CKmStereoOp::initialize()
{
    reallocDsi();

    CUShortImage * imgL_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Left Image" ) );

    if (!imgL_p)
        return false;
    
    m_stereoOp.setPyramidParams ( imgL_p->getWidth(),
                                  imgL_p->getHeight(),
                                  m_stereoOp.getLevels() );

    //m_stereoOp.setDisparityRange ( S2D<int>( 0, 100 ) );

    S2D<unsigned int> imgSize = imgL_p -> getSize();
    
    m_dispImgWTA.setSize ( imgSize );
    m_dispImgWTA.ensureAllocation ( );

    m_dispImgWTAwithBounds.setSize ( imgSize );
    m_dispImgWTAwithBounds.ensureAllocation ( );

    m_dispImgDP.setSize ( imgSize );
    m_dispImgDP.ensureAllocation ( );

    m_dispImgDPwithPred.setSize ( imgSize );
    m_dispImgDPwithPred.ensureAllocation ( );

    m_dispImgVDP.setSize ( imgSize );
    m_dispImgVDP.ensureAllocation ( );

    m_dispImgDPwithBounds.setSize ( imgSize );
    m_dispImgDPwithBounds.ensureAllocation ( );

    m_dispImgPyrDP.setSize ( imgSize );
    m_dispImgPyrDP.ensureAllocation ( );

    m_dispImgPyrDPwithBounds.setSize ( imgSize );
    m_dispImgPyrDPwithBounds.ensureAllocation ( );

    m_auxDispMap.setSize ( imgSize );
    m_auxDispMap.ensureAllocation ( );

    return COperator::initialize();
}

/// Init event.
void CKmStereoOp::reallocDsi()
{
    printf("reallocating dsi\n");
    
    CUShortImage * imgL_p = dynamic_cast<CUShortImage *>(getInput ( "Rectified Left Image" ) );
    float scaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );

    if (!imgL_p) return;
    
    m_dsi.setImageSizes ( imgL_p -> getWidth(), 
                          imgL_p -> getHeight(),
                          -((int)m_zssdKernelWidth_ui)/2-1, 
                          (m_maxDisparity_i * scaleFactor_f) + 
                          m_zssdKernelWidth_ui/2+1 );

    m_optDsi.setImageSizes ( imgL_p -> getWidth(), 
                             imgL_p -> getHeight(),
                             -((int)m_zssdKernelWidth_ui)/2-1, 
                             (m_maxDisparity_i * scaleFactor_f) + 
                             m_zssdKernelWidth_ui/2+1 );

    m_rightDsi.setImageSizes ( imgL_p -> getWidth(), 
                               imgL_p -> getHeight(),
                               -((int)m_zssdKernelWidth_ui)/2-1, 
                               (m_maxDisparity_i * scaleFactor_f) + 
                               m_zssdKernelWidth_ui/2+1 );

    m_dsi.clear();
    m_optDsi.clear();
    m_rightDsi.clear();
    
    CDsiOpFloat_t::SDispSpaceImage dsi = m_dsi.getDisparitySpaceImage( ); 


    printf("new min disp %i, new max disp %i\n",
           dsi.minDisp_i, dsi.maxDisp_i );
}

/// Reset event.
bool CKmStereoOp::reset()
{
    logger::msg("Reset called");
    return COperator::reset();
}

bool CKmStereoOp::exit()
{
    return COperator::exit();
}

void 
CKmStereoOp::keyPressed ( CKeyEvent * f_event_p )
{
    return COperator::keyPressed ( f_event_p );    
}



/// Mouse moved.
void CKmStereoOp::mouseMoved ( CMouseEvent * f_event_p )
{
    CDrawingList *  list_p =  getDrawingList ("DSI Slice");

    float scaleFactor_f = getCastedInputObject<CIO_float, float> ("Rectification Scale Factor", 1.f );
    const int row_i = f_event_p -> posInScreen.y * scaleFactor_f;
    const int col_i = f_event_p -> posInScreen.x * scaleFactor_f;

    if ( f_event_p -> displayScreen == list_p->getPosition() &&
         list_p -> isVisible() )
    { 
        static CFloatImage img;


        if ((f_event_p -> qtMouseEvent_p -> buttons() & Qt::LeftButton) != 0 && 
            (f_event_p -> qtMouseEvent_p -> modifiers() & Qt::ShiftModifier ) != 0)
        {
            
            CDsiOpFloat_t::SDispSpaceImage dsi;

            if ( m_showOptDsi_b )
                dsi = m_optDsi.getDisparitySpaceImage( );
            else
                dsi = m_dsi.getDisparitySpaceImage( );
            
            img.setWidth  ( dsi.dispRange_ui );
            img.setHeight ( dsi.width_ui );
            img.setData   ( (float *) dsi.getDispColumnSlice(row_i) );
            
            list_p -> clear();
            list_p -> addImage ( img, 0, 0, 800, 600, 
                                 m_dsiSliceScale_d, 
                                 m_dsiSliceOffset_d,
                                 1.f, true );

            list_p = getDrawingList ("DSI Slice Row Overlay");
            list_p -> clear();
            list_p -> setLineWidth ( 2 );
            
            list_p -> setLineColor(SRgb(0,255,0));
            list_p -> addRectangle ( 0, (row_i-1)/scaleFactor_f, 
                                     800, 
                                     (row_i+1)/scaleFactor_f );
            updateDisplay();
        }
        else
            if ((f_event_p -> qtMouseEvent_p -> modifiers() & Qt::ControlModifier ) != 0 &&
                img.getWidth() > 0 )
            {
                const int disp_i = f_event_p -> posInScreen.x/800. * img.getWidth();
                const int col_i  = f_event_p -> posInScreen.y/600. * img.getHeight();
            
                printf("DSI (r:%i, c:%i, dL%i) = %f\n",
                       row_i, col_i, disp_i, 
                       img.getScanline(col_i)[disp_i] );

            }
        
    }

    list_p =  getDrawingList ("Residuum");

    if ( f_event_p -> displayScreen == list_p->getPosition() &&
         list_p -> isVisible() )
    {
        CFloatImage * predDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Disparity Image") );

        printf("%i %i result: %f prediction: %f error: %f\n",
               row_i, 
               col_i,
               m_dispImgDPwithBounds.getScanline(row_i)[col_i],
               predDispImg_p?predDispImg_p->getScanline(row_i)[col_i]:0,
               m_dispImgResiduum.getScanline(row_i)[col_i] );
    }

    return COperator::mouseMoved ( f_event_p );
}

void
CKmStereoOp::updateOptDsi ( const int f_row_i, 
                            const CDynamicProgrammingOp::CNodeImage & f_graphImg)
{
    CDsiOpFloat_t::SDispSpaceImage dsi = m_optDsi.getDisparitySpaceImage( );
   
    for ( int j = 0 ; j < (int)dsi.width_ui; ++j  )
    {
        CDynamicProgrammingOp::Node * node_p = f_graphImg.getScanline(j);
        float * cost_p = dsi.getPtrAtRowColumnDisp ( f_row_i, j, dsi.minDisp_i );
        float min_f = std::numeric_limits<float>::max();
        float max_f = std::numeric_limits<float>::min();
        
        for ( int d = 0; d < (int)dsi.dispRange_ui; ++d, ++cost_p, ++node_p )
        {
            *cost_p = node_p->m_accCost_f;

            if ( 0 && node_p->m_parNode_i >= 0 && 
                 node_p->m_parNode_i < (int) dsi.width_ui )
                *cost_p -= f_graphImg.getScanline(j-1)[node_p->m_parNode_i].m_accCost_f;

            if ( *cost_p > 0 && min_f > *cost_p ) 
                min_f = *cost_p;

            if ( *cost_p > 0 && max_f < *cost_p ) 
                max_f = *cost_p;
        }

        //printf("%i %i is min %f max %f\n", f_row_i, j, min_f, max_f );

        max_f -= min_f;

        cost_p = dsi.getPtrAtRowColumnDisp ( f_row_i, j, dsi.minDisp_i );
        for ( int d = 0; d < (int)dsi.dispRange_ui; ++d, ++cost_p )
            *cost_p = 10000.f * (*cost_p - min_f)/max_f;
    }
}

void
CKmStereoOp::refineDispMap ( CFloatImage         &fr_dispImg,
                             const CDsiOpFloat_t &f_dsi )
{
    CFloatImage * buffImg_p[2] = { &fr_dispImg, &m_auxDispMap };
    int idx_i = 0;
    
    CDsiOpFloat_t::SDispSpaceImage dsi    = f_dsi.getDisparitySpaceImage( );
    CDsiOpFloat_t::SDispSpaceImage optDsi = m_optDsi.getDisparitySpaceImage( );    

    if ( m_preApplyMedianFilter_b )
    {
        if (m_medianFilterMode_e == MFM_3x3 )
            CMedianFilter<float,float>::compute3x3(  *buffImg_p[idx_i],   
                                                     *buffImg_p[(idx_i+1)] );
        else
            CMedianFilter<float,float>::compute5x5(  *buffImg_p[idx_i],   
                                                     *buffImg_p[(idx_i+1)] );

        printf("Pre applying MF result into %i\n", idx_i);

        ++idx_i; 
    }
    
    //m_auxDispMap.copyImageContent ( fr_dispImg );
    
    int w_i = fr_dispImg.getWidth();
    int h_i = fr_dispImg.getHeight();
    
    bool m_checkMinOtherSpace_b = true;

    /// Check for min in DSI?
    if (m_checkMinDsi_b)
    {
        for (int i = 0; i < h_i; ++i)
        {
            float * disp_p = buffImg_p[idx_i]->getScanline(i);
            
            for ( int j = 0; j < w_i; ++j, ++disp_p )
            {
                if (*disp_p >= 0 && *disp_p < dsi.maxDisp_i )
                {
                    float *  cost_p;

                    const CDsiOpFloat_t::SDispSpaceImage * dsi_p[2] = {&dsi, &optDsi};
                    int numS_i = 1;

                    if (m_useOptDsi_b) std::swap( dsi_p[0], dsi_p[1] );
                    
                    if ( m_checkMinOtherSpace_b )
                        numS_i = 2;
                    
                    bool dispOk_b = false;
                    
                    for (int s = 0; s < numS_i && !dispOk_b; ++s)
                    {
                        for (int o = 0; o <= 2 && !dispOk_b; ++o)
                        {
                            int offset_i = o==0?0:(o==1?-1:1);

                            cost_p = dsi_p[s] -> getPtrAtRowColumnDisp ( i, j, *disp_p + offset_i);
                                
                            if (j - (*disp_p + offset_i) >= 0 && (*disp_p + offset_i) > 0 &&
                                cost_p[-1] > 0 && 
                                cost_p[ 0] >= 0 && 
                                cost_p[ 1] > 0 &&
                                cost_p[-1] >= cost_p[0] &&
                                cost_p[ 1] >= cost_p[0] )
                            {
                                float nom_f = 0.5 * ( cost_p[1] - cost_p[-1] );
                                if ( nom_f != 0.0 )
                                {
                                    float denom_f  = 2.0 * cost_p[0] - ( cost_p[1] + cost_p[-1] );
                                    
                                    float correction_f = nom_f/denom_f;
                                    

                                    if (correction_f <= 0.5 && correction_f >= -0.5)
                                    {
                                        *disp_p += offset_i + nom_f / denom_f;
                                        dispOk_b = true;
                                    }
                                }

                                break;
                            }
                        }
                    }

                    if (!dispOk_b)  *disp_p = -1;
                }
                else
                    *disp_p = -1;
            }
        }
    }

    if ( m_postApplyMedianFilter_b )
    {
        if (m_medianFilterMode_e == MFM_3x3 )
            CMedianFilter<float,float>::compute3x3(  *buffImg_p[idx_i],   
                                                     *buffImg_p[(idx_i+1)%2] );
        else
            CMedianFilter<float,float>::compute5x5(  *buffImg_p[idx_i],   
                                                     *buffImg_p[(idx_i+1)%2] );
        idx_i++; idx_i%=2;
    }

    if ( idx_i == 1 )
    {
        fr_dispImg.copyImageContentFrom ( m_auxDispMap );
    }
}

void 
CKmStereoOp::postProcess ( CFloatImage &fr_dispImg )
{
    if (!m_useRRI_b) return;
    
    CFloatImage * predDispImg_p = dynamic_cast<CFloatImage *>(getInput ( "Predicted Disparity Image") );
    double dispOffset_i = getCastedInputObject<CIO_int, int> ("Disp Offset for Remapped RRI", 0);    

    if ( !predDispImg_p ) return;
    
    int w_i = fr_dispImg.getWidth();
    int h_i = fr_dispImg.getHeight();
    
    for (int i = 0; i < h_i; ++i)
    {
        float * disp_p = fr_dispImg.getScanline(i);
        float * pred_p = predDispImg_p->getScanline(i);

        for ( int j = 0; j < w_i; ++j, ++disp_p, ++pred_p )
        {
            if (*pred_p > 0)
            {
                if ( *disp_p >= 0 )
                {
                    *disp_p += *pred_p - dispOffset_i;
                }
            }
            else
                *disp_p = -1.f;
        }
    }
}

void
CKmStereoOp::remapDsi()
{
    
    CDsiOpFloat_t::SDispSpaceImage dsi    = m_dsi.getDisparitySpaceImage( );    
    CDsiOpFloat_t::SDispSpaceImage dstDsi = m_rightDsi.getDisparitySpaceImage( );
    
#if defined ( _OPENMP )
    const unsigned int numThreads_ui = omp_get_max_threads();
#pragma omp parallel for num_threads(numThreads_ui) schedule(dynamic)
#endif
    for (int i = 0; i < (int)dsi.height_ui; ++i)
    {
        for (int j = 0; j <  (int)dsi.width_ui; ++j)
        {
            for (int d = dsi.minDisp_i; d <= dsi.maxDisp_i; ++d)
            {
                if (j + d >= 0 && 
                    j + d < (int)dsi.width_ui )
                {
                    *dstDsi.getPtrAtRowColumnDisp (i, j, d) = 
                        *dsi.getPtrAtRowColumnDisp (i, j+d, d);
                }
            }            
        }
    }
}

void 
CKmStereoOp::doLR_RL_Check( CFloatImage       &fr_lDispImg,
                            const CFloatImage &f_rDispImg )
{
    int w_i = fr_lDispImg.getWidth();
    int h_i = fr_lDispImg.getHeight();
    
    for (int i = 0; i < h_i; ++i)
    {
        float * l_p = fr_lDispImg.getScanline(i);
        float * r_p = f_rDispImg.getScanline(i);

        for ( int j = 0; j < w_i; ++j, ++l_p )
        {
            float j_f = j - *l_p;
            int   j_i = (int) j_f;
            
            if (j_i >= 0 && j_f < w_i-1 )
            {
                float alpha_f = (j_f-j_i);
                float disp_f = r_p[j_i] * (1.f-alpha_f) + r_p[j_i+1] * alpha_f;
                    

                if ( fabs (disp_f - *l_p) > m_maxDispDiff_f )
                    *l_p = -1;
            }            
        }
    }
}


