#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>

#include "open_gl.hh"
#include "imageio.hh"
#include "poisson.hh"
#include "window.hh"
#include "cuda_helpers.hh"

static PixelBufferObject * s_fg = 0;         ///< the background pixel buffer
static PixelBufferObject * s_bg = 0;         ///< the foreground pixel buffer
static uchar4 * s_bg_sub_img = 0;            ///< subrectangle of the bg where the fg is
static uint2 s_offset = make_uint2(0, 0);    ///< offset of foreground to background
static bool* p_mask = 0;                     ///< the mask for poisson blending

static const dim3 s_block_size(16, 32, 1);	 ///< block size for computation
static dim3 s_n_blocks(0, 0, 0);             ///< n blocks is proportional to fg size

static int2 s_last_mouse_pos;                ///< the last mouse position

enum {
	STATE_MOVING,             ///< The user is moving the foreground texture.
	STATE_GRAPH_CUTTING,      ///< The graph cuts algorithm is running.
	STATE_POISSON_BLENDING    ///< The poisson blending algorithm is running.
} s_program_state;

__global__
void extractBGSubImageKernel(
	uchar4 * bg,         ///< [in]  background image
	uint2 bg_size,      ///< [in]  background image size
	uint2 fg_size,      ///< [in]  foreground image size
	uint2 offset,       ///< [in]  foreground to background offset
	uchar4 * bg_sub_img  ///< [out] rectangular subset of background
	)
{
	// compute the output coordinate
	uint2 fg_coord = get_coord();
	CHECK_OUT_OF_BOUNDS(fg_coord, fg_size);
	
	// offset to get the input coordinate
	uint2 bg_coord = fg_coord + offset;
	CHECK_OUT_OF_BOUNDS(bg_coord, bg_size);
	
	// convert
	unsigned int fg_indx = to_indx(fg_coord, fg_size);
	unsigned int bg_indx = to_indx(bg_coord, bg_size);
	bg_sub_img[fg_indx] = bg[bg_indx];
}

/// Draw a texture of the foreground image.
void displayForegroundImage()
{
	// bind and draw the foreground texture
	s_fg->bindTexture();
	const int fw = s_fg->getWidth();
	const int fh = s_fg->getHeight();
	glBegin(GL_QUADS);
	glTexCoord2f(0.0f,0.0f); glVertex2i(0     , 0 +  0);
	glTexCoord2f(1.0f,0.0f); glVertex2i(0 + fw, 0 +  0);
	glTexCoord2f(1.0f,1.0f); glVertex2i(0 + fw, 0 + fh);
	glTexCoord2f(0.0f,1.0f); glVertex2i(0     , 0 + fh);
	glEnd();
}

/// Switch to the graph cuts state
void switchToGraphCuts()
{
	// write the background subimage
	{
		PixelBufferObject::MemoryMap map(s_bg->mapDeviceMemory());
		extractBGSubImageKernel<<<s_n_blocks, s_block_size>>>(
				map.getPointer(), s_bg->getSize(), s_fg->getSize(),
				s_offset, s_bg_sub_img);
	}
	// don't do anything, since we're aren't improving the mask
	// with graph cuts yet
	s_program_state = STATE_GRAPH_CUTTING;
}

/// Switch to the poisson blending state
void switchToPoissonBlending()
{
	printf("Switching to Poisson blending\n");

	bool* c_mask = 0;
	const unsigned int n_elts = s_fg->getWidth() * s_fg->getHeight();
	cudaMalloc((void **) &c_mask, n_elts * sizeof(bool));
	cudaMemcpy(c_mask, p_mask, n_elts, cudaMemcpyHostToDevice);
	
	// initialize the poisson blending
	{
		PixelBufferObject::MemoryMap map(s_fg->mapDeviceMemory());
		poissonInit(s_bg_sub_img, map.getPointer(), c_mask);
	}
	
	// free the mask
	cudaFree(c_mask);
	
	// update the program state
	s_program_state = STATE_POISSON_BLENDING;
}

/// The display callback.
void display()
{
	// Clear the screen
	glClear(GL_COLOR_BUFFER_BIT);

	// bind and draw the background texture
	s_bg->bindTexture();
	const int bw = s_bg->getWidth();
	const int bh = s_bg->getHeight();
	glBegin(GL_QUADS);
	glTexCoord2f(0.0f,0.0f); glVertex2i( 0,  0);
	glTexCoord2f(1.0f,0.0f); glVertex2i(bw,  0);
	glTexCoord2f(1.0f,1.0f); glVertex2i(bw, bh);
	glTexCoord2f(0.0f,1.0f); glVertex2i( 0, bh);
	glEnd();
	
	glPushMatrix();
	glTranslatef(s_offset.x, s_offset.y, 0);
	
	switch (s_program_state) {
		case STATE_MOVING:
			{
				displayForegroundImage();
			}
			break;
		case STATE_GRAPH_CUTTING:
			{
				// go straight to poisson, since we aren't graph cutting yet
				switchToPoissonBlending();
			}
			break;
		case STATE_POISSON_BLENDING:
			{
				// Display the iamges
				poissonDisplay();
			}
			break;
		default:
			{
				// we are in an undefined state
				assert(false);
			}
			break;

	}
	
	glPopMatrix();
	
	// Don't forget to swap the buffers!
	glutSwapBuffers();
	
	// if animFlag is true, then indicate the display needs to be redrawn
	glutPostRedisplay();
}

void reshape(int width, int height) {
	glViewport(0, 0, width, height);
	
	// set view matrix
	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();

	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	glOrtho(0, width, 0, height, 0.0f, 1.0f);
	
	glutPostRedisplay();
}

/// The keyboard callback
void keyboard(
	unsigned char key,	///< the key being pressed
	int x,				///< x coordinate of the mouse
	int y)				///< y coordinate of the mouse
{
	switch (key)
	{
		case 'q':
		case 'Q':
		case 27:
			cudaThreadExit();
			exit(0);
			break;
		default:
			break;
	}
}

/// The mouse callback
void mouse(
	int button, ///< which button was pressesd
	int state,	///< up or down
	int x,		///< x position
	int y)		///< y position
{
	switch (state) {
		case GLUT_DOWN:
			{
				s_last_mouse_pos = make_int2(x, y);
				s_program_state = STATE_MOVING;
				printf("Moving image\n");
			}
			break;
		case GLUT_UP:
			{
				switchToGraphCuts();
			}
			break;
		default:
			{
				// undefined state
				assert(false);
			}
			break;
	}
}

/// Mouse motion callback
void motion(
	int x,		///< x coordinate of mouse
	int y)		///< y coordinate of mouse
{
	assert(s_program_state == STATE_MOVING);
	int mouse_delta_x = x - s_last_mouse_pos.x;
	int mouse_delta_y = y - s_last_mouse_pos.y;
	int offset_x = clamp(((int) s_offset.x) + mouse_delta_x, 0,
		(int) (s_bg->getWidth() - s_fg->getWidth()));
	int offset_y = clamp(((int) s_offset.y) - mouse_delta_y, 0,
		(int) (s_bg->getHeight() - s_fg->getHeight()));
	s_offset = make_uint2(offset_x, offset_y);
	s_last_mouse_pos = make_int2(x, y);
}

/// Load mask for Poisson blending
bool* loadPoissonMask(
	const char* maskFileName)    ///< file name of the mask
{
	int width, height;
	unsigned char* img_data = loadImageRGBA(maskFileName, &width, &height);
	if (!img_data || width <= 0 || height <= 0) {
		printf("Error loading mask file: %s\n", maskFileName);
		exit(-1);
	}

	bool* mask_data = (bool*)malloc(width*height*4);

	for (int pp = 0; pp < width*height; pp++) {
		if (img_data[4*pp] > 0)
			mask_data[pp] = true;
		else
			mask_data[pp] = false;
	}

	free(img_data);

	return mask_data;
}

int main(int argc, char **argv)
{
	// image names
	const char* bckg_img_name;
	const char* forg_img_name;
	const char* mask_img_name;

	// load specific set of images; otherwise, use the pool
	if (argc == 2) {
		if (strcmp(argv[1], "pool") == 0) {
			bckg_img_name = "pool-1024x512.png";
            forg_img_name = "child-256x256.png";
            mask_img_name = "child-256x256-mask.png";
		}
		else if (strcmp(argv[1], "eyes") == 0) {
			bckg_img_name = "pitt-512x512.png";
            forg_img_name = "zhang-ziyi-eyes-256x128.png";
            mask_img_name = "zhang-ziyi-eyes-256x128-mask.png";
		}
		else if (strcmp(argv[1], "mouth") == 0) {
			bckg_img_name = "pitt-512x512.png";
            forg_img_name = "zhang-ziyi-mouth-256x128.png";
            mask_img_name = "zhang-ziyi-mouth-256x128-mask-2.png";
		}
		else {
			// undefined set of images
			printf("Unknown set of images: %s; try \"pool\", \"eyes\", or \"mouth\"", argv[1]);
			exit(0);
		}
	}
	// defaults
	else {
		bckg_img_name = "pool-1024x512.png";
		forg_img_name = "child-256x256.png";
		mask_img_name = "child-256x256-mask.png";
	}

	// have to create the OpenGL context before creating pixel buffer objects
	createWindow(argc, argv);
	
	// load the background and foreground images
	s_bg = PixelBufferObject::loadImage(bckg_img_name);
	s_fg = PixelBufferObject::loadImage(forg_img_name);
	p_mask = loadPoissonMask(mask_img_name);
	reshapeWindow(s_bg->getWidth(), s_bg->getHeight());
	
	// create and init the subimage
	uint2 fg_size = s_fg->getSize();
	s_n_blocks = dim3(fg_size.x / s_block_size.x, fg_size.y / s_block_size.y, 1);
	static int n_elts = fg_size.x * fg_size.y;
	cudaMalloc((void **) &s_bg_sub_img, n_elts * sizeof(uchar4));
	
	// print instructions
	printf("\nInstructions:\nClick and hold to drag the foreground image to the desired location.\nRelease the mouse button to start the Poisson blending.\n\n");


	// set the program state
	s_program_state = STATE_MOVING;

	// init the poisson subsystem
	poissonAllocateMemory(*s_fg);
	
	// start the application
	startApplication(argc, argv);
	return 0;
}
