//////////////////////////////////////////////////////////////////
//         Dense stereo using Semi-Global Matching
//
//  Implementation of Heiko Hirschmuller's algorithm described in
//  "Accurate and Efficient Stereo Processing by Semi-Global Matching
//  and Mutual Information"
//
//  Author: Ilya Rosenberg, NYU Media Research Lab 12/15/2005
//
//


#include <stdio.h>
#include <stdlib.h>
//#include <malloc.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <time.h>

#include "SGM.h"
#include "SmartArray.h"

#define max(a,b) ( (a) > (b) ? (a) : (b) )
#define min(a,b) ( (a) < (b) ? (a) : (b) )
#define absint(a) ( (a) < 0 ? -(a) : (a) )

int dir_dx[] = { 0,  1, 1, 1, 0, -1, -1, -1};
int dir_dy[] = {-1, -1, 0, 1, 1,  1,  0, -1};

// These constants need to be cleaned up
// Especially SKIP_LEFT and SKIP_RIGHT which I usually just leave as 0
#define SKIP_LEFT 0 // How many pixels on the left image not to operate over
#define SKIP_RIGHT 0 // How many pixels on the right image not to operate over
#define MAX_DIF ((255*255)*3)

const int x_min = 0 + SKIP_LEFT;
const int x_max = WIDTH-SKIP_RIGHT-1;
const int y_min = 0;
const int y_max = HEIGHT-1;
#define REAL_WIDTH (x_max - x_min + 1)


inline int clamp(int c)
{
    if(c < 0) return 0;
    else if(c > 255) return 255;
    else return c;
}

inline int clamp(int c, int min, int max)
{
    if(c < min) return min;
    else if(c > max) return max;
    else return c;
}


// Return a pointer to the desired image
RGB (*SGM::get_img(Side side))[WIDTH]
{
    if(side == Left) return m_leftImg_p;
    else if(side == Right) return m_rightImg_p;
    else return NULL;
}

void SGM::set_img(Side side, RGB (*img)[WIDTH])
{
    if(side == Left) m_leftImg_p = img;
    else if(side == Right) m_rightImg_p = img;
    else
    {
        printf("Error in set_img: Invalid side\n");
    }
}

// Return a pointer to the desired result array
ResultPixel (*SGM::get_result_arr(Side side))[WIDTH]
{
    if(side == Left) return m_leftResult_p;
    else if(side == Center) return m_centerResult_p;
    else if(side == Right) return m_rightResult_p;
    else return NULL;
}

inline RGB pixAvg(RGB& a, RGB& b)
{
    RGB dest;
    dest.r = (a.r + b.r) / 2;
#ifdef SGM_USE_COLOR

    dest.g = (a.g + b.g) / 2;
    dest.b = (a.b + b.b) / 2;
#endif
    return dest;
}

inline errt SGM::colorDif(RGB& a, RGB& b)
{
#ifdef SGM_USE_COLOR
    int d, tot;
	
    d = a.r - b.r;
    tot = d*d;
    d = a.g - b.g;
    tot += d*d;
    d = a.b - b.b;
    tot += d*d;

    //tot = sqrtf((float)tot);
    tot = m_sqrtTable_p[tot];
    //if(tot > 40) tot = 40;

    return tot;
#else
    int d;
    errt tot;
	
    d = a.r - b.r;
    tot = abs(d*d);

    //tot = sqrtf((float)tot);
    //tot = m_sqrtTable_p[tot];
    //if(tot > 40) tot = 40;

    return tot;
#endif
}



SGM::SGM()
{
    m_sqrtTable_p = NULL;

    m_imgsLocallyAllocated_b = false;
    m_leftImg_p = NULL;
    m_rightImg_p = NULL;

    m_diffErrVol_p = NULL;

    color_penalty = NULL;

    m_leftResult_p = NULL;
    m_centerResult_p = NULL;
    m_rightResult_p = NULL;
    m_tempResult_p = NULL;
}

SGM::~SGM()
{
    delete[] m_sqrtTable_p;

    if(m_imgsLocallyAllocated_b)
    {
        delete[] m_leftImg_p;
        delete[] m_rightImg_p;
    }

    delete[] m_diffErrVol_p;

    delete[] color_penalty;

    delete[] m_leftResult_p;
    delete[] m_centerResult_p;
    delete[] m_rightResult_p;
    delete[] m_tempResult_p;
}


void SGM::initDataStructs(bool allocate_imgs)
{
    m_sqrtTable_p = new int[MAX_DIF];

    if(allocate_imgs)
    {
        m_imgsLocallyAllocated_b = true;
        m_leftImg_p = new RGB[HEIGHT][WIDTH];
        m_rightImg_p = new RGB[HEIGHT][WIDTH];
    }

    m_diffErrVol_p = new errt[HEIGHT][WIDTH][2][DEPTH];

    color_penalty = new errt[8][HEIGHT][WIDTH];

    m_leftResult_p = new ResultPixel[HEIGHT][WIDTH];
    m_centerResult_p = new ResultPixel[HEIGHT][WIDTH];
    m_rightResult_p = new ResultPixel[HEIGHT][WIDTH];
    m_tempResult_p = new ResultPixel[HEIGHT][WIDTH];

    for(int i = 0; i < MAX_DIF; i++)
    {
        m_sqrtTable_p[i] = int(sqrtf(float(i)));
    }
}


void SGM::initTodos(bool sixteen_dirs, bool optimize)
{
    // Mark all start points
    // IDEA: Can improve accuracy by not updating corners from the side
    // at which we start.
    if(optimize)
    {
        // NOTE: This version optimizes passes so we go back and
        // forward for each path. Checksum should match with that
        // of less efficient version.

        for(int x = x_min; x <= x_max; x++)
        {
            m_todos.pushAutoSize(Todo(x, y_min, S)); // S
            m_todos.pushAutoSize(Todo(x, y_max, N)); // N
        }
	
        for(int y = y_min; y <= y_max; y++)
        {
            m_todos.pushAutoSize(Todo(x_min, y, E)); // E
            m_todos.pushAutoSize(Todo(x_max, y, W)); // W
        }

        for(int i = 0; i < HEIGHT-1; i++)
        {
            m_todos.pushAutoSize(Todo(x_min, y_max-i, SE)); // SE Left Side
            m_todos.pushAutoSize(Todo(x_min+i, y_max, NW)); // NW Left Side
        }
        for(int i = 0; i <= REAL_WIDTH-HEIGHT; i++)
        {
            m_todos.pushAutoSize(Todo(x_min+i, y_min, SE)); // SE Middle
            m_todos.pushAutoSize(Todo(x_min+HEIGHT-1+i, y_max, NW)); // NW Middle
        }
        for(int i = 1; i < HEIGHT; i++)
        {
            m_todos.pushAutoSize(Todo(x_min+REAL_WIDTH-HEIGHT+i, y_min, SE)); // SE Right
            m_todos.pushAutoSize(Todo(x_max, y_max-i, NW)); // NW Right
        }

        for(int i = 0; i < HEIGHT-1; i++)
        {
            m_todos.pushAutoSize(Todo(x_max, y_max-i, SW)); // SW Right
            m_todos.pushAutoSize(Todo(x_max-i, y_max, NE)); // NE Right
        }
        for(int i = 0; i <= REAL_WIDTH-HEIGHT; i++)
        {
            m_todos.pushAutoSize(Todo(x_max-i, y_min, SW)); // SW Middle
            m_todos.pushAutoSize(Todo(x_max-(HEIGHT-1+i), y_max, NE)); // NE Middle
        }
        for(int i = 1; i < HEIGHT; i++)
        {
            m_todos.pushAutoSize(Todo(x_max-(REAL_WIDTH-HEIGHT+i), y_min, SW)); // SW Left
            m_todos.pushAutoSize(Todo(x_min, y_max-i, NE)); // NE Left
        }
    }
    else
    {
        for(int x = x_min; x <= x_max; x++)
        {
            m_todos.pushAutoSize(Todo(x, y_min, S)); // S
            m_todos.pushAutoSize(Todo(x, y_max, N)); // N

            m_todos.pushAutoSize(Todo(x, y_min, SE)); // SE
            m_todos.pushAutoSize(Todo(x, y_min, SW)); // SW
            m_todos.pushAutoSize(Todo(x, y_max, NE)); // NE
            m_todos.pushAutoSize(Todo(x, y_max, NW)); // NW
        }

        for(int y = y_min; y <= y_max; y++)
        {
            m_todos.pushAutoSize(Todo(x_min, y, E)); // E
            m_todos.pushAutoSize(Todo(x_max, y, W)); // W

            if(y != y_min) m_todos.pushAutoSize(Todo(x_min, y, SE)); // SE
            if(y != y_max) m_todos.pushAutoSize(Todo(x_min, y, NE)); // NE
            if(y != y_min) m_todos.pushAutoSize(Todo(x_max, y, SW)); // SW
            if(y != y_max) m_todos.pushAutoSize(Todo(x_max, y, NW)); // NW
        }
    }

    // 16 Directions (Unoptimized for cache behavior)
    if(sixteen_dirs)
    {
        //SSE
        for(int y = y_min; y <= y_max; y+=2) m_todos.pushAutoSize(Todo(x_min, y, SSE)); 
        for(int x = x_min+1; x <= x_max; x++) m_todos.pushAutoSize(Todo(x, y_min, SSE));

        //NNE
        for(int y = y_max; y >= y_min; y-=2) m_todos.pushAutoSize(Todo(x_min, y, NNE));
        for(int x = x_min+1; x <= x_max; x++) m_todos.pushAutoSize(Todo(x, y_max, NNE));

        //SSW
        for(int y = y_min; y <= y_max; y+=2) m_todos.pushAutoSize(Todo(x_max, y, SSW)); 
        for(int x = x_min; x <= x_max-1; x++) m_todos.pushAutoSize(Todo(x, y_min, SSW));

        //NNW
        for(int y = y_max; y >= y_min; y-=2) m_todos.pushAutoSize(Todo(x_max, y, NNW));
        for(int x = x_min; x <= x_max-1; x++) m_todos.pushAutoSize(Todo(x, y_max, NNW));

        //SEE
        for(int x = x_min; x <= x_max; x+=2) m_todos.pushAutoSize(Todo(x, y_min, SEE));
        for(int y = y_min+1; y <= y_max; y++) m_todos.pushAutoSize(Todo(x_min, y, SEE));

        //SWW
        for(int x = x_max; x >= x_min; x-=2) m_todos.pushAutoSize(Todo(x, y_min, SWW));
        for(int y = y_min+1; y <= y_max; y++) m_todos.pushAutoSize(Todo(x_max, y, SWW));

        //NEE
        for(int x = x_min; x <= x_max; x+=2) m_todos.pushAutoSize(Todo(x, y_max, NEE));
        for(int y = y_min; y <= y_max-1; y++) m_todos.pushAutoSize(Todo(x_min, y, NEE));

        //NWW
        for(int x = x_max; x >= x_min; x-=2) m_todos.pushAutoSize(Todo(x, y_max, NWW));
        for(int y = y_min; y <= y_max-1; y++) m_todos.pushAutoSize(Todo(x_max, y, NWW));
    }

}

/*
  bool loadImages(int img_no)
  {
  char left_img_name[100];
  char right_img_name[100];

  #ifdef FULL_SIZE
  sprintf(left_img_name, "Input\\lagr-stereo-%04d-l.png", img_no);
  sprintf(right_img_name, "Input\\lagr-stereo-%04d-r.png", img_no);
  #else
  sprintf(left_img_name, "Input\\half\\lagr-stereo-%04d-hl.png", img_no);
  sprintf(right_img_name, "Input\\half\\lagr-stereo-%04d-hr.png", img_no);
  #endif

  left_img = new Image();
  right_img = new Image();

  if(!left_img->readFile(left_img_name))
  {
  printf("Error: no left image at %s\n", left_img_name);
  return false;
  }
  if(left_img->width != WIDTH) printf("Error: left test image width != %d\n", WIDTH);
  if(left_img->height != HEIGHT) printf("Error: left test image height != %d\n", HEIGHT);

  if(!right_img->readFile(right_img_name))
  {
  printf("Error: no right image at %s\n", right_img_name);
  return false;
  }
  if(right_img->width != WIDTH) printf("Error: right test image width != %d\n", WIDTH);
  if(right_img->height != HEIGHT) printf("Error: right test image height != %d\n", HEIGHT);

  return true;
  }
*/

void SGM::colorCorrect(Side side)
{
    float rl=0;
#ifdef SGM_USE_COLOR
    float gl=0, bl=0;
#endif
    int n_pixels = WIDTH*HEIGHT;
    for(int y = 0; y < HEIGHT; y ++)
	for(int x = 0; x < WIDTH; x ++)
	{
            RGB& pix = m_leftImg_p[y][x];
            rl += pix.r;
#ifdef SGM_USE_COLOR
            gl += pix.g;
            bl += pix.b;
#endif
	}
    rl /= n_pixels;

#ifdef SGM_USE_COLOR
    gl /= n_pixels; bl /= n_pixels;
#endif

    // printf("Left Image Avg Color = [%f, %f, %f]\n", rl, gl, bl);

    float rr=0;
#ifdef SGM_USE_COLOR
    float gr=0, br=0;
#endif
    for(int y = 0; y < HEIGHT; y ++)
	for(int x = 0; x < WIDTH; x ++)
	{
            RGB& pix = m_rightImg_p[y][x];
            rr += pix.r;
#ifdef SGM_USE_COLOR
            gr += pix.g;
            br += pix.b;
#endif
	}
    rr /= n_pixels; 
#ifdef SGM_USE_COLOR
    gr /= n_pixels; br /= n_pixels;
#endif
    // printf("Right Image Avg Color = [%f, %f, %f]\n", rr, gr, br);

    if(side == Left)
    {
        for(int y = 0; y < HEIGHT; y++)
            for(int x = 0; x < WIDTH; x++)
            {
                RGB& pix = m_leftImg_p[y][x];
                pix.r = clamp(int(pix.r * rr / rl));
#ifdef SGM_USE_COLOR
                pix.g = clamp(int(pix.g * gr / gl));
                pix.b = clamp(int(pix.b * br / bl));
#endif
            }
        //left_img->writeFile("Corrected Left.png");
    }
    else if(side == Right)
    {
        for(int y = 0; y < HEIGHT; y++)
            for(int x = 0; x < WIDTH; x++)
            {
                RGB& pix = m_rightImg_p[y][x];
                pix.r = clamp(int(pix.r * rl / rr));
#ifdef SGM_USE_COLOR
                pix.g = clamp(int(pix.g * gl / gr));
                pix.b = clamp(int(pix.b * bl / br));
#endif
            }
        //right_img->writeFile("Corrected Right.png");
    }
    else
    {
        printf("Error in colorCorrect: Invalid side\n");
    }
}

void SGM::calcDifferences(Side side, errt default_err, errt cutoff, CostStyle cost_style)
{
    // Calculate differences
    if(side == Center)
    {
        // Upsample the images
        // This doubles the resolution of both images, and
        // shifts the upsampled left image right by a pixel while shifting
        // the upsampled right image left by a pixel.
        // This adds a +1/2 disparity to everything. Also, it has the effect
        // of making every comparison a comparison between an actual pixel
        // and an averaged pixel.
        RGB (*l_up_img)[WIDTH*2] = new RGB[HEIGHT][WIDTH*2];
        RGB (*r_up_img)[WIDTH*2] = new RGB[HEIGHT][WIDTH*2];
        for(int y = 0; y < HEIGHT; y++)
        {
            for(int x = 0; x < WIDTH; x++)
            {
                l_up_img[y][(x*2)+1] = m_leftImg_p[y][x];
                l_up_img[y][x*2] = pixAvg(m_leftImg_p[y][max(x-1,0)], m_leftImg_p[y][x]);
				
                r_up_img[y][x*2] = m_rightImg_p[y][x];
                r_up_img[y][(x*2)+1] = pixAvg(m_rightImg_p[y][x], m_rightImg_p[y][min(x+1,WIDTH-1)]);
            }
        }
        //left_up_img->writeFile("left_up_img.png");
        //right_up_img->writeFile("right_up_img.png");

        for(int y = 0; y < HEIGHT; y++)
        {
            for(int x = 0; x < WIDTH; x++)
            {
                for(int dx = MIN_DISP; dx <= MAX_DISP; dx ++)
                {
                    int leftx = x*2-dx;
                    int rightx = x*2+dx;
                    errt error = default_err;

                    if(cost_style == Regular)
                    {
                        if(leftx >= 0 && leftx < WIDTH*2 && rightx >= 0 && rightx < WIDTH*2)
                        {
                            error = errt(colorDif(l_up_img[y][leftx], r_up_img[y][rightx]));
                        }
                    }
                    else if(cost_style == Clamp)
                    {
                        error = errt(colorDif(l_up_img[y][clamp(leftx,0,WIDTH*2-1)], r_up_img[y][clamp(rightx,0,WIDTH*2-1)]));
                    }
                    else if(cost_style == Tomasi)
                    {
                        if(leftx >= 0 && leftx < WIDTH*2 && rightx >= 0 && rightx < WIDTH*2)
                        {
                            error = ERRT_MAX;

                            if(leftx >= 1) error = min(error, errt(colorDif(l_up_img[y][leftx-1], r_up_img[y][rightx])));
                            error = min(error, errt(colorDif(l_up_img[y][leftx], r_up_img[y][rightx])));
                            if(leftx < WIDTH*2-1) error = min(error, errt(colorDif(l_up_img[y][leftx+1], r_up_img[y][rightx])));

                            //if(rightx >= 1) error = min(error, errt(colorDif(l_up_img[y][leftx], r_up_img[y][rightx-1])));
                            //error = min(error, errt(colorDif(l_up_img[y][leftx], r_up_img[y][rightx])));
                            //if(rightx < WIDTH*2-1) error = min(error, errt(colorDif(l_up_img[y][leftx], r_up_img[y][rightx+1])));
                        }
                    }
                    else
                    {
                        printf("Error: unknown cost style\n");
                    }

                    m_diffErrVol_p[y][x][0][dx-MIN_DISP] = min(cutoff, error);;
                    m_diffErrVol_p[y][x][1][dx-MIN_DISP] = 0;
                }
            }
            //printf("Done with test disparity %d\n", dx);
        }

        delete[] l_up_img;
        delete[] r_up_img;

    }
    else if(side == Left)
    {
        // This is a left to right difference calc (Left image is baseline, right image is match)
        for(int y = 0; y < HEIGHT; y++)
        {
            for(int x = 0; x < WIDTH; x++)
            {
                for(int dx = MIN_DISP; dx <= MAX_DISP; dx ++)
                {
                    int leftx = x;
                    int rightx = x+dx;
                    errt error = default_err;

                    if(cost_style == Regular)
                    {
                        if(/*leftx >= 0 && leftx < WIDTH &&*/ rightx >= 0 && rightx < WIDTH)
                        {
                            error = errt(colorDif(m_leftImg_p[y][leftx], m_rightImg_p[y][rightx]));
                        }
                    }
                    else if(cost_style == Clamp)
                    {
                        error = errt(colorDif(m_leftImg_p[y][leftx], m_rightImg_p[y][clamp(rightx,0,WIDTH-1)]));
                    }
                    else if(cost_style == Tomasi)
                    {
                        if(/*leftx >= 0 && leftx < WIDTH &&*/ rightx >= 0 && rightx < WIDTH)
                        {
                            error = ERRT_MAX;
                            if(rightx >= 1)
                            {
                                RGB left_avg = pixAvg(m_rightImg_p[y][rightx], m_rightImg_p[y][rightx-1]);
                                error = min(error, colorDif(m_leftImg_p[y][leftx], left_avg));
                            }
                            error = min(error, colorDif(m_leftImg_p[y][leftx], m_rightImg_p[y][rightx]));
                            if(rightx < WIDTH-1)
                            {
                                RGB right_avg = pixAvg(m_rightImg_p[y][rightx], m_rightImg_p[y][rightx+1]);
                                error = min(error, colorDif(m_leftImg_p[y][leftx], right_avg));
                            }
                        }
                    }
                    else
                    {
                        printf("Unknown cost_style\n");
                    }

                    m_diffErrVol_p[y][x][0][dx-MIN_DISP] = min(cutoff, error);
                    m_diffErrVol_p[y][x][1][dx-MIN_DISP] = 0;
                }
            }
            //printf("Done with disparity %d\n", dx);
        }
    }
    else if(side == Right)
    {
        // This is a right to left difference calc (Right image is baseline, Left is match)
        for(int y = 0; y < HEIGHT; y++)
        {
            for(int x = 0; x < WIDTH; x++)
            {
                for(int dx = MIN_DISP; dx <= MAX_DISP; dx ++)
                {
                    int leftx = x-dx;
                    int rightx = x;
                    errt error = default_err;

                    if(cost_style == Regular)
                    {
                        if(leftx >= 0 && leftx < WIDTH /* && rightx >= 0 && rightx < WIDTH */)
                        {
                            error = errt(colorDif(m_leftImg_p[y][leftx], m_rightImg_p[y][rightx]));
                        }
                    }
                    else if(cost_style == Clamp)
                    {
                        error = errt(colorDif(m_leftImg_p[y][clamp(leftx, 0, WIDTH-1)], m_rightImg_p[y][rightx]));
                    }
                    else if(cost_style == Tomasi)
                    {
                        if(leftx >= 0 && leftx < WIDTH /* && rightx >= 0 && rightx < WIDTH */)
                        {
                            error = ERRT_MAX;
                            if(rightx >= 1)
                            {
                                RGB left_avg = pixAvg(m_rightImg_p[y][rightx], m_rightImg_p[y][rightx-1]);
                                error = min(error, colorDif(m_leftImg_p[y][leftx], left_avg));
                            }
                            error = min(error, colorDif(m_leftImg_p[y][leftx], m_rightImg_p[y][rightx]));
                            if(rightx < WIDTH-1)
                            {
                                RGB right_avg = pixAvg(m_rightImg_p[y][rightx], m_rightImg_p[y][rightx+1]);
                                error = min(error, colorDif(m_leftImg_p[y][leftx], right_avg));
                            }
                        }
                    }
                    else
                    {
                        printf("Unknown cost_style\n");
                    }

                    m_diffErrVol_p[y][x][0][dx-MIN_DISP] = min(cutoff, error);
                    m_diffErrVol_p[y][x][1][dx-MIN_DISP] = 0;
                }
            }
            //printf("Done with disparity %d\n", dx);
        }

    }
}

void SGM::calcColorPenalty(Side side, errt penalty2, errt penalty3)
{
    RGB (*img)[WIDTH] = get_img(side);

    for(int d = 0; d < 8; d ++)
    {
        int dx = dir_dx[d];
        int dy = dir_dy[d];
        for(int y = 0; y < HEIGHT; y ++)
            for(int x = 0; x < WIDTH; x ++)
            {
                int x2 = x-dx;
                int y2 = y-dy;

                errt error;

                if(x2 >= 0 && x2 < WIDTH && y2 >= 0 && y2 < HEIGHT)
                {
                    RGB& pix1 = img[y][x];
                    RGB& pix2 = img[y2][x2];
			
                    error = penalty2 + penalty3 / (colorDif(pix1, pix2)+1);
                }
                else
                {
                    error = penalty2 + penalty3;
                }

                color_penalty[d][y][x] = error;
            }
    }
}



//////////////////////////////////////////////////
// These functions use bitwise operations to get
// the min of a number. Can be faster than if statements
// when the branch direction is hard to predict, and
// slower when the branch direction is consistent.
#ifndef USE_FLOATS

inline errt bitmin(errt a, errt b) {
    errt dif = (a-b) >> (sizeof(a)*4-1);
    return (a & dif) | (b & ~dif);
}

#else

#define AS_INT(a) (*((int*)(&(a))))
#define AS_FLT(a) (*((float*)(&(a))))

inline errt bitmin(errt a, errt b) {
    float d = (a-b);
    int dif = AS_INT(d) >> (sizeof(int)*4-1);
    int min = (AS_INT(a) & dif) | ( AS_INT(b) & ~dif);
    return AS_FLT(min);
}

#endif

void SGM::semiGlobalSmoothing(errt penalty1, errt penalty2, bool use_color_penalty)
{
    const bool show_progress = false;

    errt last_err_data[DEPTH+2];
    errt cur_err_data[DEPTH+2];
    last_err_data[0] = last_err_data[DEPTH+1] = ERRT_MAX - (2 * penalty2);
    cur_err_data[0] = cur_err_data[DEPTH+1] = ERRT_MAX - (2 * penalty2);

    errt* last_err = &(last_err_data[1]);
    errt* cur_err = &(cur_err_data[1]);
    errt* temp_err;

    int lastx, lasty;
    int curx, cury;
    //int stepx, stepy;
    //int dir;

    /*
      for(int y = 0; y < HEIGHT; y++)
      for(int x = 0; x < WIDTH; x++)
      for(int z = 0; z < DEPTH; z++)
      m_diffErrVol_p[y][x][1][z] = 0;
    */

    int last_percentage = -1;
    int cur_percentage;
    int step;
    int odd_even;

    for(int t = 0; t < m_todos.size; t++)
    {
        lastx = -1; lasty = -1;
        odd_even = 0;
        step = 0;
        Todo* todo = &(m_todos[t]);
        curx = todo->x; cury = todo->y;
        errt* last_dif_col = NULL;
        //stepx = todo->stepx; stepy = todo->stepy;
        //dir = m_todos[t].dir;
		
        do
        {
            errt* dif_col = &m_diffErrVol_p[cury][curx][0][0];
            errt* err_col = &m_diffErrVol_p[cury][curx][1][0];
            if(lastx == -1)
            {
                for(int i = 0; i < DEPTH; i++) 
                {
                    last_err[i] = 0;
                    //err_col[i] += cur_err[i] = dif_col[i];
                    cur_err[i] = dif_col[i];
                }
            }
            else
            {
                errt min_last_err;
                if(allow_jumps || subtract_common)
                {
                    min_last_err = last_err[0];
                    for(int j = 1; j < DEPTH; j++)
                    {
                        errt err = last_err[j];
                        if(use_bitmin) {
                            min_last_err = bitmin(err, min_last_err);
                        } else {
                            if(err < min_last_err) min_last_err = err;
                        }
                    }
					
                    if(!subtract_common)
                    {
                        min_last_err += penalty2;
                    }
                    else
                    {
                        for(int j = 0; j < DEPTH; j++)
                        {
                            last_err[j] -= min_last_err;
                        }
                        min_last_err = penalty2;
                    }
                }
				
                for(int i = 0; i < DEPTH; i++)
                {
                    // In an old version of the code, I would look through all the items,
                    // and add an extra penalty based on the distance. This discouraged
                    // big discontinuities. It worked, but was quite expensive.
                    // It could have probably been made more efficent by not looking at the
                    // items that were far away in depth.				

                    errt min_err;

                    //if(last_dif_col[i] == 666)
                    //{
                    //	min_err = 0; //min_last_err-penalty2; //min(last_err[i+1], last_err[i-1]);
                    //}
                    //else
                    //{
                    //if(i >= 1) { errt err = last_err[i-1] + penalty1; if(err < min_err) min_err = err; }
                    //if(i < DEPTH-1) { errt err = last_err[i+1] + penalty1; if(err < min_err) min_err = err; }
                    if(use_bitmin)
                    {
                        min_err = bitmin(bitmin(last_err[i-1],last_err[i+1]) + penalty1, last_err[i]);
                    } else {
                        min_err = last_err[i];
                        errt err = last_err[i-1] + penalty1; if(err < min_err) min_err = err;
                        errt err2 = last_err[i+1] + penalty1; if(err2 < min_err) min_err = err2;
                    }
                    //}

                    if(allow_jumps)
                    {
                        if(use_bitmin)
                        {
                            min_err = bitmin(min_last_err, min_err);
                        } else {
                            if(min_last_err < min_err) min_err = min_last_err;
                        }
                    }

                    //err_col[i] += cur_err[i] = min_err + dif_col[i];
                    cur_err[i] = min_err + dif_col[i];
                }
            }


            //float* err_col = &err_vol[cury][curx][0];
            //if(step > 40)
            //{
            for(int i = 0; i < DEPTH; i++) err_col[i] += cur_err[i];
            //}

            temp_err = cur_err; cur_err = last_err; last_err = temp_err;
            lastx = curx; lasty = cury;
            curx += todo->stepx[odd_even]; cury += todo->stepy[odd_even]; // Have to adjust for the weird directions.
            if(use_color_penalty)
            {
                penalty2 = color_penalty[todo->dir[odd_even]/2][cury][curx];
            }
            odd_even = odd_even ^ 1; // same as odd_even = 1-odd_even
            step++;
            last_dif_col = dif_col;

            if(show_progress)
            {
                cur_percentage = int(100.0f * float(t) / float(m_todos.size));
                if(cur_percentage > last_percentage) printf("%d%% done\n", cur_percentage);
                last_percentage = cur_percentage;
            }

        } while (curx >= x_min && curx <= x_max && cury >= y_min && cury <= y_max);
    }
}

///////////////////////////////////////////////////////////////
// This is a helper function for extractResults
// z is coordinate in error space (not disparity)
int imgx2volx(int x, int z, Side source_side, Side result_side)
{
    if(source_side == result_side)
    {
        return x;
    }

    int new_x;
    if(source_side == Left && result_side == Right)
    {
        new_x = x-(z+MIN_DISP);
    }
    else if(source_side == Right && result_side == Left)
    {
        new_x = x+(z+MIN_DISP);
    }
    else if((source_side == Center && result_side == Right) || (source_side == Left && result_side == Center))
    {
        new_x = x - ((z+MIN_DISP)/2); // This takes steps like(up, up, right, up, up, right)
    }
    else if((source_side == Center && result_side == Left) || (source_side == Right && result_side == Center))
    {
        new_x = x + ((z+MIN_DISP)/2);
    }
    else
    {
        printf("Error in imgx2volx: invalid sides\n");
        return -1;
    }

    if(new_x < 0 || new_x >= WIDTH) return -1;
    else return new_x;
}


// Source Side is the side that the semi global smoothing
// was done on. Result side is the side for which the results
// should be gotten.
void SGM::extractResults(Side source_side, Side result_side)
{
    ResultPixel (*result_arr)[WIDTH];
    //result_arr = (result_side == Left ? m_leftResult_p : m_rightResult_p);
    result_arr = get_result_arr(result_side);

    int pixel_sum = 0;
    int pixel_check = 0;

    for(int y = 0; y < HEIGHT; y++)
    {
        for(int x = 0; x < WIDTH; x++)
        {
            errt min_err = ERRT_MAX;
            int min_err_disp = -1;
            int min_err_x = -1;

            //for(int z = 0; z < DEPTH; z++)
            for(int z = DEPTH-1; z >= 0; z--)
            {
                int volx = imgx2volx(x, z, source_side, result_side);
                if(volx == -1) continue;

                errt err = m_diffErrVol_p[y][volx][1][z];
                if(err < min_err)
                {
                    min_err = err;
                    min_err_disp = z;
                    min_err_x = volx;
                }
            }
			
            float subpixel_disp = float(min_err_disp);
            float correction = 0.0f;

            if(min_err_disp != 0 && min_err_disp != DEPTH-1)
            {
                int volxa = imgx2volx(x, min_err_disp-1, source_side, result_side);
                int volxb = imgx2volx(x, min_err_disp, source_side, result_side);
                int volxc = imgx2volx(x, min_err_disp+1, source_side, result_side);

                if(volxa != -1 && volxb != -1 && volxc != -1)
                {
                    int a = m_diffErrVol_p[y][volxa][1][min_err_disp-1];
                    int b = m_diffErrVol_p[y][volxb][1][min_err_disp];
                    int c = m_diffErrVol_p[y][volxc][1][min_err_disp+1];

                    correction = float(a - c) / float(2*a + 2*c - 4*b);
                }
            }
            subpixel_disp += correction;

            ResultPixel& result = result_arr[y][x];
            result.depth = min_err_disp + MIN_DISP;
            result.error = m_diffErrVol_p[y][min_err_x][0][min_err_disp];
            result.subpixel_correction = correction;
            result.subpixel_depth = subpixel_disp + MIN_DISP;
            result.consistent = true;

            pixel_sum += min_err_disp;
            pixel_check = pixel_check ^ min_err_disp;
        }
    }

    // printf("Sum = %d, CheckSum = %d\n", pixel_sum, pixel_check);
}

void SGM::consistencyCheck(int thresh)
{
    for(int y = 0; y < HEIGHT; y++)
    {
        for(int x = 0; x < WIDTH; x++)
        {
            ResultPixel& lresult = m_leftResult_p[y][x];
            int r_x = x + lresult.depth;
            if(r_x < 0 || r_x > WIDTH-1)
            {
                lresult.consistent = false;
            }
            else
            {
                ResultPixel& rresult = m_rightResult_p[y][r_x];
                if(absint(rresult.depth - lresult.depth) > thresh)
                {
                    //rresult.consistent = false;
                    lresult.consistent = false;
                    //if(rresult.depth > lresult.depth)
                    //	lresult = rresult;
                }
            }
        }
    }

    for(int y = 0; y < HEIGHT; y++)
    {
        for(int x = 0; x < WIDTH; x++)
        {
            ResultPixel& rresult = m_rightResult_p[y][x];
            int l_x = x - rresult.depth;
            if(l_x < 0 || l_x > WIDTH-1)
            {
                rresult.consistent = false;
            }
            else
            {
                ResultPixel& lresult = m_leftResult_p[y][l_x];
                if(absint(rresult.depth - lresult.depth) > thresh)
                {
                    //lresult.consistent = false;
                    rresult.consistent = false;
                    //if(lresult.depth > rresult.depth)
                    //	rresult = lresult;
                }
            }
        }
    }
}

// TODO: Clean up this code!!!
void SGM::medianFilter(Side side, int steps, int thresh)
{
    ResultPixel (*result_arr)[WIDTH];
    result_arr = get_result_arr(side);

    int n_corrected_pixels = 0;

    for(int step = 0; step < steps; step++)
    {
        for(int y = 0; y < HEIGHT; y++)
        {
            for(int x = 0; x < WIDTH; x++)
            {
                ResultPixel& destresult = result_arr[y][x];
				
                if(!destresult.consistent)
                {
                    // Skip the inconsistent pixels
                    m_tempResult_p[y][x] = destresult;
                    continue; 
                }
				
                ResultPixel* neighbors[8];
                int n_neighbors = 0;
                int n_inconsistent_neighbors = 0;

                // Collect all the neighbors
                for(int i = 0; i < 8; i++)
                {
                    int x2 = dir_dx[i] + x;
                    int y2 = dir_dy[i] + y;

                    if(x2 >= 0 && x2 < WIDTH && y2 >= 0 && y2 < HEIGHT)
                    {
                        ResultPixel* neighbor = &(result_arr[y2][x2]);
                        if(neighbor->consistent) neighbors[n_neighbors++] = neighbor;
                        else n_inconsistent_neighbors ++;
                    }
                }

                if(n_inconsistent_neighbors > n_neighbors)
                {
                    m_tempResult_p[y][x] = destresult;
                    m_tempResult_p[y][x].consistent = false;
                    continue;
                }

                if(n_neighbors <= 0)
                {
                    m_tempResult_p[y][x] = destresult;
                    continue;
                }

                // Insertion sort the eight neighbors
                // TODO: Replace with a quicksort
                for(int i = 0; i < n_neighbors-1; i++)
                {
                    float min_value = neighbors[i]->subpixel_depth;
                    int min_item = i;
                    for(int j = i+1; j < n_neighbors; j++)
                    {
                        if(neighbors[j]->subpixel_depth < min_value)
                        {
                            min_value = neighbors[j]->subpixel_depth;
                            min_item = j;
                        }
                    }
                    ResultPixel* temp = neighbors[min_item];
                    neighbors[min_item] = neighbors[i];
                    neighbors[i] = temp;
                }

                //int depth1 = n_neighbors/2;
                //int depth2 = clamp(depth1+1,0,n_neighbors);
                //destresult.subpixel_depth = (neighbors[depth1] + neighbors[depth2]) * 0.5f;

                ResultPixel* median = neighbors[n_neighbors/2];
				
                if(absint(median->depth - destresult.depth) > thresh)
                {
                    m_tempResult_p[y][x] = *median;
                }
                else
                {
                    m_tempResult_p[y][x] = destresult;
                }

                n_corrected_pixels++;
            }
        }

        // TODO: Would be nice if I could just swap the two arrays here.
        for(int y = 0; y < HEIGHT; y++)
            for(int x = 0; x < WIDTH; x++)
            {
                result_arr[y][x] = m_tempResult_p[y][x];
            }

        if(n_corrected_pixels == 0) return;
        n_corrected_pixels = 0;
    }
}

// TODO: There should be a way to get rid of the streaks that
// result from the median fill.
void SGM::medianFill(Side side, int steps)
{
    ResultPixel (*result_arr)[WIDTH];
    result_arr = get_result_arr(side);

    SmartArray<ResultPixel*> corrected_pixels;

    for(int step = 0; step < steps; step++)
    {
        for(int y = 0; y < HEIGHT; y++)
        {
            for(int x = 0; x < WIDTH; x++)
            {
                ResultPixel& destresult = result_arr[y][x];
				
                if(destresult.consistent) continue; // Skip the consistent pixels
				
                ResultPixel* neighbors[8];
                int n_neighbors = 0;

                // Collect all the neighbors
                for(int i = 0; i < 8; i++)
                {
                    int x2 = dir_dx[i] + x;
                    int y2 = dir_dy[i] + y;

                    if(x2 >= 0 && x2 < WIDTH && y2 >= 0 && y2 < HEIGHT)
                    {
                        ResultPixel* neighbor = &(result_arr[y2][x2]);
                        if(neighbor->consistent) neighbors[n_neighbors++] = neighbor;
                    }
                }

                if(n_neighbors <= 0) continue;

                // Insertion sort the eight neighbors
                // TODO: Replace with a quicksort
                for(int i = 0; i < n_neighbors-1; i++)
                {
                    float min_value = neighbors[i]->subpixel_depth;
                    int min_item = i;
                    for(int j = i+1; j < n_neighbors; j++)
                    {
                        if(neighbors[j]->subpixel_depth < min_value)
                        {
                            min_value = neighbors[j]->subpixel_depth;
                            min_item = j;
                        }
                    }
                    ResultPixel* temp = neighbors[min_item];
                    neighbors[min_item] = neighbors[i];
                    neighbors[i] = temp;
                }

                //int depth1 = n_neighbors/2;
                //int depth2 = clamp(depth1+1,0,n_neighbors);
                //destresult.subpixel_depth = (neighbors[depth1] + neighbors[depth2]) * 0.5f;

                ResultPixel* median = neighbors[n_neighbors/2];
                destresult.depth = median->depth;
                destresult.error = median->error;
                destresult.subpixel_correction = median->subpixel_correction;
                destresult.subpixel_depth = median->subpixel_depth;
                corrected_pixels.pushAutoSize(&destresult);
            }
        }

        for(int i = 0; i < corrected_pixels.size; i++)
        {
            corrected_pixels[i]->consistent = true;
        }

        if(corrected_pixels.size == 0) return;
        corrected_pixels.size = 0;
    }
}

void* sgm_setup(int height, int width, int min_disp, int max_disp, bool sixteen_dirs)
{
    SGM* sgm = new SGM();

    assert(height == HEIGHT);
    assert(width == WIDTH);
    assert(min_disp == MIN_DISP);
    assert(max_disp == MAX_DISP);

    sgm->initDataStructs(false);
    sgm->initTodos(sixteen_dirs, true);

    return (void*)sgm;
}

void sgm_stereo (void* context, 
                 char* left_image, 
                 char* right_image, 
                 ResultPixel** left_results, 
                 ResultPixel** right_results, 
                 bool do_color_correct, 
                 bool do_median_filter, 
                 bool do_median_fill, 
                 errt penalty1, 
                 errt penalty2, 
                 errt penalty3, 
                 errt default_err, 
                 CostStyle cost_style, 
                 errt cutoff) // Returns error code
{

    printf("SGM* sgm = (SGM*)context;\n");
    SGM* sgm = (SGM*)context;

    printf("sgm->set_img(Left, (RGB (*)[WIDTH])left_image);\n");
    sgm->set_img(Left, (RGB (*)[WIDTH])left_image);
    printf("sgm->set_img(Right, (RGB (*)[WIDTH])right_image);\n");
    sgm->set_img(Right, (RGB (*)[WIDTH])right_image);

    if(do_color_correct)
    {
        printf("sgm->colorCorrect(Left);\n");
        sgm->colorCorrect(Left);
    }

    printf("sgm->calcDifferences(Left, default_err, cutoff, cost_style);\n");
    sgm->calcDifferences(Left, default_err, cutoff, cost_style);
    printf("if(penalty3 > 0) sgm->calcColorPenalty(Left, penalty2, penalty3);\n");
    if(penalty3 > 0) sgm->calcColorPenalty(Left, penalty2, penalty3);
    printf("sgm->semiGlobalSmoothing(penalty1, penalty2, penalty3 > 0);\n");
    sgm->semiGlobalSmoothing(penalty1, penalty2, penalty3 > 0);
    printf("sgm->extractResults(Left, Left);\n");
    sgm->extractResults(Left, Left);

    printf("sgm->calcDifferences(Right, default_err, cutoff, cost_style);\n");
    sgm->calcDifferences(Right, default_err, cutoff, cost_style);
    printf("if(penalty3 > 0) sgm->calcColorPenalty(Right, penalty2, penalty3);\n");
    if(penalty3 > 0) sgm->calcColorPenalty(Right, penalty2, penalty3);
    printf("sgm->semiGlobalSmoothing(penalty1, penalty2, penalty3 > 0);\n");
    sgm->semiGlobalSmoothing(penalty1, penalty2, penalty3 > 0);
    printf("sgm->extractResults(Right, Right);\n");
    sgm->extractResults(Right, Right);

    printf("sgm->consistencyCheck();\n");
    sgm->consistencyCheck();
    if(do_median_filter)
    {
        printf("sgm->medianFilter(Left);\n");
        sgm->medianFilter(Left);
        printf("sgm->medianFilter(Right);\n");
        sgm->medianFilter(Right);
    }
	
    if(do_median_fill)
    {
        printf("sgm->medianFill(Left, 500);\n");
        sgm->medianFill(Left, 500);
        printf("sgm->medianFill(Right, 500);\n");
        sgm->medianFill(Right, 500);
    }

    printf("left_results  = (ResultPixel*)sgm->get_result_arr(Left);\n");
    *left_results  = (ResultPixel*)sgm->get_result_arr(Left);
    printf("right_results = (ResultPixel*)sgm->get_result_arr(Right);\n");
    *right_results = (ResultPixel*)sgm->get_result_arr(Right);
}

void sgm_free(void* context)
{
    delete (SGM*)context;
}
