#include "PixelBufferObject.hh"

#include <assert.h>
#include <stdio.h>
#include <cuda_gl_interop.h>

#include "cuda_err.hh"
#include "imageio.hh"

PixelBufferObject *
PixelBufferObject::loadImage(const char * file_name)
{
	// load the image
	int width, height;
	unsigned char * img_data = loadImageRGBA(file_name, &width, &height);
	if (!img_data) {
		printf("Error loading \"%s\".\n", file_name);
		exit(-1);
	}
	
	// create the pixel buffer object
	PixelBufferObject * pbo = new PixelBufferObject(width, height);
	assert(pbo);
	
	// map the memory and then write the pixel buffer object
	{
		PixelBufferObject::MemoryMap map(pbo->mapHostMemory());
		printf("copying the memory: %p -> %p\n", img_data, map.getPointer());
		memcpy(map.getPointer(), img_data, width * height * 4);
	}
	
	// all done
	free(img_data);
	return pbo;	
}

PixelBufferObject::PixelBufferObject(
	const unsigned int & width,
	const unsigned int & height) :
	m_size(make_uint2(width, height)),
	m_pbo_id(0), m_texture_id(0), m_dirty(true)
{	
	////////////////////////////////////////
	// Initialize the Pixel Buffer Object //
	////////////////////////////////////////
	
	// Generate a buffer ID called a PBO (Pixel Buffer Object)
	glGenBuffers(1,&m_pbo_id);
	assert(m_pbo_id);		
	
	// Make this the current UNPACK buffer (OpenGL is state-based)
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo_id);
	
	// Allocate data for the buffer. 4-channel 8-bit image
	const int size_tex_data = sizeof(GLubyte) * m_size.x * m_size.y * 4;
	glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY);
	
	// register this buffer object with CUDA
	exit_on_err(cudaGLRegisterBufferObject(m_pbo_id));
	
	////////////////////////////
	// Initialize the Texture //
	////////////////////////////
	
	// Generate a texture identifier
	glGenTextures(1, &m_texture_id);
	assert(m_texture_id);
	
	// Make this the current texture (remember that GL is state-based)
	glBindTexture(GL_TEXTURE_2D, m_texture_id);
	
	// Allocate the texture memory. The last parameter is NULL since we only
	// want to allocate memory, not initialize it
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, m_size.x, m_size.y, 0,
			GL_BGRA,GL_UNSIGNED_BYTE, NULL);
	
	// Must set the filter mode, GL_LINEAR enables interpolation when scaling
	glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);

	// Note: GL_TEXTURE_RECTANGLE_ARB may be used instead of
	// GL_TEXTURE_2D for improved performance if linear interpolation is
	// not desired. Replace GL_LINEAR with GL_NEAREST in the
	// glTexParameteri() call	
}
		
PixelBufferObject::~PixelBufferObject()
{
	//////////////////////////////////
	// Free the Pixel Buffer Object //
	//////////////////////////////////
	
	// unregister this buffer object with CUDA
	exit_on_err(cudaGLUnregisterBufferObject(m_pbo_id));
	
	// delete this buffer in OpenGL
	glBindBuffer(GL_ARRAY_BUFFER, m_pbo_id);
	glDeleteBuffers(1, &m_pbo_id);
	
	// set back to zero
	m_pbo_id = NULL;
	
	//////////////////////
	// Free the Texture //
	//////////////////////
	
	glDeleteTextures(1, &m_texture_id);
	m_texture_id = NULL;
}
	
const unsigned int & PixelBufferObject::getWidth() const
{
	return m_size.x;
}

const unsigned int & PixelBufferObject::getHeight() const
{
	return m_size.y;
}

const uint2 & PixelBufferObject::getSize() const
{
	return m_size;
}

const GLuint & PixelBufferObject::getID() const
{
	return m_pbo_id;
}

PixelBufferObject::MemoryMap PixelBufferObject::mapHostMemory()
{
	// Returns an RAII object to handle memory mapping resources.
	return PixelBufferObject::MemoryMap(*this, HOST_MEMORY);	
}

PixelBufferObject::MemoryMap PixelBufferObject::mapDeviceMemory()
{
	// Returns an RAII object to handle memory mapping resources.
	return PixelBufferObject::MemoryMap(*this, DEVICE_MEMORY);
}

void PixelBufferObject::bindTexture()
{
	// bind texture from PBO
	glBindTexture(GL_TEXTURE_2D, m_texture_id);

	// If the pixel buffer has been accessed (i.e. memory mapped),
	// then transfer the data to the texture.
	if (m_dirty) {
		// Create a texture from the buffer
		glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo_id);

		// Note: glTexSubImage2D will perform a format conversion if the
		// buffer is a different format from the texture. We created the
		// texture with format GL_RGBA8. In glTexSubImage2D we specified
		// GL_BGRA and GL_UNSIGNED_INT. This is a fast-path combination

		// Note: NULL indicates the data resides in device memory
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_size.x, m_size.y, 
			GL_RGBA, GL_UNSIGNED_BYTE, NULL);
			
		// The texture and pixel buffer object are now in synch.
		m_dirty = false;
	}
}

PixelBufferObject::MemoryMap::MemoryMap(
	PixelBufferObject & parent, MemoryMapType map_type) :
		m_parent(parent), m_map_type(map_type),
		m_pointer(NULL)
{
	// Indicates that the pixel buffer data is going to be accessed.
	m_parent.m_dirty = true;
	assert(!m_pointer);
	
	if (m_map_type == HOST_MEMORY) {
		
		// map the buffer object into client's memory
		glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, m_parent.m_pbo_id);
		m_pointer = (uchar4 *) glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB);
		assert(m_pointer);
		
	} else if (m_map_type == DEVICE_MEMORY) {

		// map OpenGL buffer object for writing from CUDA on a single GPU
		// no data is moved (Win & Linux). When mapped to CUDA, OpenGL
		// should not use this buffer

		// cudaGLMapBufferObject((void**) &m_pointer, m_parent.m_pbo_id);
		// assert(m_pointer);
		
		// TODO: do I want to be using cudaGLMapBufferObjectAsync?
		exit_on_err(cudaGLMapBufferObjectAsync((void**) &m_pointer, m_parent.m_pbo_id, 0));
		assert(m_pointer);
	} else {

		// memory type must be either host or device
		assert(false);
		
	}
}
	
PixelBufferObject::MemoryMap::~MemoryMap()
{
	assert(m_parent.m_dirty);
	
	if (m_map_type == HOST_MEMORY) {

		// release the mapped buffer
	    glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB); 

		// it is good idea to release PBOs with ID 0 after use.
		// Once bound with 0, all pixel operations are back to normal ways.
		glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
		
	} else if (m_map_type == DEVICE_MEMORY) {

		// // unmap buffer object
		// cudaGLUnmapBufferObject(m_parent.m_pbo_id);
		
		// TODO: do I want to be using cudaGLMapBufferObjectAsync?
		exit_on_err(cudaGLUnmapBufferObjectAsync(m_parent.m_pbo_id, 0));
	} else {

		// memory type must be either host or device
		assert(false);
		
	}
	
	// the map is over and the pointer is no longer valid
	m_pointer = NULL;
}


uchar4 * PixelBufferObject::MemoryMap::getPointer()
{
	return m_pointer;
}