// simpleGLmain.cpp (Rob Farber)
// http://www.drdobbs.com/architecture-and-design/222600097

// open gl
#include "open_gl.hh"

// cuda utility libraries
#include <cuda_runtime.h>
#include <cutil.h>
#include <cutil_inline.h>
#include <cutil_gl_inline.h>
#include <cutil_gl_error.h>
#include <cuda_gl_interop.h>
// #include <rendercheck_gl.h>

// the kernel
#include "simple_kernel.hh"

static GLuint pbo = 0;		 ///< global pixel buffer object
static GLuint textureID = 0; ///< global texture ID

// The user must create the following routines:
// CUDA methods
extern void initCuda(int argc, char** argv);
extern void runCuda();
extern void renderCuda(int);

// callbacks
extern void display();
extern void keyboard(unsigned char key, int x, int y);
extern void mouse(int button, int state, int x, int y);
extern void motion(int x, int y);

// GLUT specific variables
unsigned int window_width = 512;
unsigned int window_height = 512;

unsigned int timer = 0; // a timer for FPS calculations

// Forward declarations of GL functionality
CUTBoolean initGL(int argc, char** argv);

// Run the Cuda part of the computation
void runCuda()
{
	uchar4 *dptr=NULL;

	// map OpenGL buffer object for writing from CUDA on a single GPU
	// no data is moved (Win & Linux). When mapped to CUDA, OpenGL
	// should not use this buffer
	cudaGLMapBufferObject((void**)&dptr, pbo);

	// execute the kernel
	const int image_width = 512;
	const int image_height = 512;
	launch_kernel(dptr, image_width, image_height);

	// unmap buffer object
	cudaGLUnmapBufferObject(pbo);
}

/// Creates a Pixel Buffer Object
void createPBO(
	GLuint* pbo,			///< the ID of the pixel buffer object
	const int image_width,	///< width of the object (power of 2)
	const int image_height) ///< height of the object (power of 2)
{
	if (pbo) {
	// set up vertex data parameter
	int num_texels = image_width * image_height;
	int num_values = num_texels * 4;
	int size_tex_data = sizeof(GLubyte) * num_values;
	
	// Generate a buffer ID called a PBO (Pixel Buffer Object)
	glGenBuffers(1,pbo);
	// Make this the current UNPACK buffer (OpenGL is state-based)
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo);
	// Allocate data for the buffer. 4-channel 8-bit image
	glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY);
		cudaGLRegisterBufferObject( *pbo );
	}
}

/// Deletes the pixel buffer object with the given ID.
void deletePBO(GLuint* pbo)
{
	if (pbo) {
	// unregister this buffer object with CUDA
	cudaGLUnregisterBufferObject(*pbo);
	
	glBindBuffer(GL_ARRAY_BUFFER, *pbo);
	glDeleteBuffers(1, pbo);
	
	*pbo = NULL;
	}
}

/// Create an OpenGL Texture
void createTexture(
	GLuint* textureID,		   ///< identifier for the texture
	unsigned int image_width,  ///< width of the texture
	unsigned int image_height) ///< height of the texture
{
	// Enable Texturing
	glEnable(GL_TEXTURE_2D);

	// Generate a texture identifier
	glGenTextures(1,textureID);

	// Make this the current texture (remember that GL is state-based)
	glBindTexture( GL_TEXTURE_2D, *textureID);

	// Allocate the texture memory. The last parameter is NULL since we only
	// want to allocate memory, not initialize it
	glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, image_width, image_height, 0,
			GL_BGRA,GL_UNSIGNED_BYTE, NULL);

	// Must set the filter mode, GL_LINEAR enables interpolation when scaling
	glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);
	// Note: GL_TEXTURE_RECTANGLE_ARB may be used instead of
	// GL_TEXTURE_2D for improved performance if linear interpolation is
	// not desired. Replace GL_LINEAR with GL_NEAREST in the
	// glTexParameteri() call
}

/// Deletes the texture with the given ID.
void deleteTexture(GLuint* tex)
{
	glDeleteTextures(1, tex);

	*tex = NULL;
}

/// Cleans up the texture and the object.
void cleanupCuda()
{
	printf("Cleaning up CUDA\n");
	if (pbo)
	deletePBO(&pbo);
	if (textureID)
	deleteTexture(&textureID);
}

// First initialize OpenGL context, so we can properly set the GL
// for CUDA. NVIDIA notes this is necessary in order to achieve
// optimal performance with OpenGL/CUDA interop.	use command-line
// specified CUDA device, otherwise use device with highest Gflops/s
void initCuda(int argc, char** argv)
{
	const int image_width = 512;
	const int image_height = 512;
	
	if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
		cutilGLDeviceInit(argc, argv);
	} else {
		cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
	}
 
	createPBO(&pbo, image_width, image_height);
	createTexture(&textureID, image_width, image_height);

	// Clean up on program exit
	atexit(cleanupCuda);

	runCuda();
}

// Simple method to display the Frames Per Second in the window title
void computeFPS()
{
	static int fpsCount=0;
	static int fpsLimit=100;

	fpsCount++;
	
	if (fpsCount == fpsLimit) {
		char fps[256];
		float ifps = 1.f / (cutGetAverageTimerValue(timer) / 1000.f);
		sprintf(fps, "15-668 - Project 1: %3.1f fps ", ifps);	
		
		glutSetWindowTitle(fps);
		fpsCount = 0; 
		
		cutilCheckError(cutResetTimer(timer));	
	}
}

void fpsDisplay()
{
	cutilCheckError(cutStartTimer(timer));	
	
	display();
	
	cutilCheckError(cutStopTimer(timer));
	computeFPS();
}

CUTBoolean createWindow(int argc, char ** argv)
{
	// Create the CUTIL timer
	cutilCheckError( cutCreateTimer( &timer));
	
	if (CUTFalse == initGL(argc, argv)) {
		return CUTFalse;
	}

	initCuda(argc, argv);
	CUT_CHECK_ERROR_GL();

	// register callbacks
	glutDisplayFunc(fpsDisplay);
	glutKeyboardFunc(keyboard);
	glutMouseFunc(mouse);
	glutMotionFunc(motion);
}

void startApplication(int argc, char ** argv)
{
	// start rendering mainloop
	glutMainLoop();
	
	// clean up
	cudaThreadExit();
	cutilExit(argc, argv);
}

CUTBoolean initGL(int argc, char **argv)
{
	//Steps 1-2: create a window and GL context (also register callbacks)
	glutInit(&argc, argv);
	glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
	glutInitWindowSize(window_width, window_height);
	glutCreateWindow("15-668 - Project 1");
	glutDisplayFunc(fpsDisplay);
	glutKeyboardFunc(keyboard);
	glutMotionFunc(motion);
	
	// check for necessary OpenGL extensions
	glewInit();
	if (! glewIsSupported( "GL_VERSION_2_0 " ) ) {
		fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.\n");
		return CUTFalse;
	}
	
	// Step 3: Setup our viewport and viewing modes
	glViewport(0, 0, window_width, window_height);

	glClearColor(0.0, 0.0, 0.0, 1.0);
	glDisable(GL_DEPTH_TEST);
	
	
	// set view matrix
	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();

	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	glOrtho(0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f);
	
	return CUTTrue;
}

/// Gets the global pixel buffer object.
GLuint getPBO() {
	return pbo;
}

/// Gets the global texture ID.
GLuint getTextureID() {
	return textureID;
}

