diff --git a/libethash/internal.c b/libethash/internal.c
index a050d5b..dd17e21 100644
--- a/libethash/internal.c
+++ b/libethash/internal.c
@@ -171,6 +171,18 @@ bool ethash_compute_full_data(
 	return true;
 }
 
+inline uint64_t mulq_hi(uint64_t a, uint64_t b) {
+        uint64_t hi, lo;
+        __asm__("mulq %3\n\t"
+                : "=d" (hi),
+		  "=a" (lo)
+                : "%a" (a),
+		  "rm" (b)
+                : "cc" );
+        return hi;
+}
+
+
 static bool ethash_hash(
 	ethash_return_value_t* ret,
 	node const* full_nodes,
@@ -202,8 +214,30 @@ static bool ethash_hash(
 	unsigned const page_size = sizeof(uint32_t) * MIX_WORDS;
 	unsigned const num_full_pages = (unsigned) (full_size / page_size);
 
+#if defined(_M_X64) && ENABLE_SSE
+	__m128i fnv_prime = _mm_set1_epi32(FNV_PRIME);
+#endif
+
+#define OPTIMIZED_MOD 1
+
+#if OPTIMIZED_MOD
+	__uint128_t recipBig = 1;
+	recipBig <<= 64;
+	recipBig += num_full_pages - 1;
+	recipBig /= num_full_pages;
+	uint64_t recip = (uint64_t)recipBig;
+#endif
+
+
 	for (unsigned i = 0; i != ETHASH_ACCESSES; ++i) {
-		uint32_t const index = fnv_hash(s_mix->words[0] ^ i, mix->words[i % MIX_WORDS]) % num_full_pages;
+#if OPTIMIZED_MOD
+		uint32_t const index_hash = fnv_hash(s_mix->words[0] ^ i, mix->words[i % MIX_WORDS]);
+		uint64_t tmp = num_full_pages * mulq_hi(index_hash, recip);
+		if (tmp > index_hash) tmp -= num_full_pages;
+		uint32_t index = (index_hash - tmp);
+#else
+		uint32_t index = fnv_hash(s_mix->words[0] ^ i, mix->words[i % MIX_WORDS]) % num_full_pages;
+#endif
 
 		for (unsigned n = 0; n != MIX_NODES; ++n) {
 			node const* dag_node;
@@ -217,15 +251,18 @@ static bool ethash_hash(
 
 #if defined(_M_X64) && ENABLE_SSE
 			{
-				__m128i fnv_prime = _mm_set1_epi32(FNV_PRIME);
+				__m128i dag0 = _mm_loadsi_128(&dag_node->xmm[0]);
+				__m128i dag1 = _mm_loadsi_128(&dag_node->xmm[1]);
+				__m128i dag2 = _mm_loadsi_128(&dag_node->xmm[2]);
+				__m128i dag3 = _mm_loadsi_128(&dag_node->xmm[3]);
 				__m128i xmm0 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[0]);
 				__m128i xmm1 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[1]);
 				__m128i xmm2 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[2]);
 				__m128i xmm3 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[3]);
-				mix[n].xmm[0] = _mm_xor_si128(xmm0, dag_node->xmm[0]);
-				mix[n].xmm[1] = _mm_xor_si128(xmm1, dag_node->xmm[1]);
-				mix[n].xmm[2] = _mm_xor_si128(xmm2, dag_node->xmm[2]);
-				mix[n].xmm[3] = _mm_xor_si128(xmm3, dag_node->xmm[3]);
+				mix[n].xmm[0] = _mm_xor_si128(xmm0, dag0);
+				mix[n].xmm[1] = _mm_xor_si128(xmm1, dag1);
+				mix[n].xmm[2] = _mm_xor_si128(xmm2, dag2);
+				mix[n].xmm[3] = _mm_xor_si128(xmm3, dag3);
 			}
 			#else
 			{
@@ -293,6 +330,19 @@ bool ethash_quick_check_difficulty(
 	return ethash_check_difficulty(&return_hash, boundary);
 }
 
+void *mmap_malloc(size_t cache_size) {
+	void *r = mmap(0, cache_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|MAP_POPULATE, 0, 0);
+	if (r == MAP_FAILED) {
+		fprintf(stderr, "Eek - could not allocate enough hugepages.\n");
+	}
+
+	return r;
+}
+
+void mmap_free(void *ptr, size_t cache_size) {
+	munmap(ptr, cache_size);
+}
+
 ethash_light_t ethash_light_new_internal(uint64_t cache_size, ethash_h256_t const* seed)
 {
 	struct ethash_light *ret;
@@ -300,7 +350,7 @@ ethash_light_t ethash_light_new_internal(uint64_t cache_size, ethash_h256_t cons
 	if (!ret) {
 		return NULL;
 	}
-	ret->cache = malloc((size_t)cache_size);
+	ret->cache = mmap_malloc((size_t)cache_size);
 	if (!ret->cache) {
 		goto fail_free_light;
 	}
@@ -312,7 +362,7 @@ ethash_light_t ethash_light_new_internal(uint64_t cache_size, ethash_h256_t cons
 	return ret;
 
 fail_free_cache_mem:
-	free(ret->cache);
+	mmap_free(ret->cache, cache_size);
 fail_free_light:
 	free(ret);
 	return NULL;
@@ -330,7 +380,7 @@ ethash_light_t ethash_light_new(uint64_t block_number)
 void ethash_light_delete(ethash_light_t light)
 {
 	if (light->cache) {
-		free(light->cache);
+		mmap_free(light->cache, light->cache_size);
 	}
 	free(light);
 }
@@ -369,11 +419,12 @@ static bool ethash_mmap(struct ethash_full* ret, FILE* f)
 	if ((fd = ethash_fileno(ret->file)) == -1) {
 		return false;
 	}
+	/* Todo:  Copy this into a hugepage region. */
 	mmapped_data = mmap(
 		NULL,
 		(size_t)ret->file_size + ETHASH_DAG_MAGIC_NUM_SIZE,
 		PROT_READ | PROT_WRITE,
-		MAP_SHARED,
+		MAP_SHARED | MAP_POPULATE,
 		fd,
 		0
 	);
