/* Uses implicit lists.  One very efficient optimization made however is
 * to instead of using a footer, the back-direction coalesing is done 
 * inline during the traversal while adding new memory.  This saves both
 * memory and increases speed (as the nodes are already being traversed).
 * This implementation is good for this lab since the utilization is 
 * heavily weighted.  Although traversing the implicit list is slow in 
 * general, the overhead is only a single 8-byte header for each block of
 * space used.  The only case where this loses memory efficiency is in 
 * situations where interlaced frees lead to fragmentation; this is the
 * trade-off made, which for the particular performance metric used, seems
 * to be very effective.
 */

/* Commentary:  
 *   This is a somewhat naive memory management implementation for a 
 *   "real world" malloc/free; however for our particular assignment
 *   I think it the best choice based on the particular performance
 *   metric we were given.  
 */

/*
 *  Papadimitriou Spiros
 *  spapadim+@cs.cmu.edu
 *
 *  CS213 - Lab assignment 3
 *
 */

/*
 * David Meltzer (davem@cmu.edu)
 * James Hollifield (jamesh@andrew)
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>

#include "memlib.h"
#include "malloc.h"

team_t team = {
    /* Team name to be displayed on webpage */
    "boom",
    /* First member full name */
    "David Jay Meltzer",
    /* First member email address */
    "davem",
    /* Second member full name (leave blank if none) */
    "James Hollifield",
    /* Second member email address (blank if none) */
    "jamesh"
};

/* Macro takes a pointer as argument and returns a void* that is aligned
 * on 8 byte boundary.  If lower 3 bits are set, it is not aligned, and so
 * next aligned word is 8-(x & 7) bytes forward. 
 */
#define ALIGN_POINTER(x) (void*) (((unsigned long)x&7)?(unsigned long) \
x+(8-((unsigned long)x&7)):(unsigned long)x)

#define ALLOCATE_MASK (0x8000000000000000)
#define LENGTH_MASK   (0x7fffffffffffffff)

/* Masks out lowest bit (allocated bit) from a word */
#define BLOCK_LENGTH(x) (x & LENGTH_MASK)

/* 1 if block is currently allocated, 0 if free */
#define IS_ALLOCATED(x) ((x>>63) & 1)

int mm_init (void)
{
    return 0;
}


void *mm_malloc (size_t size)
{
    unsigned long *curheap, *frag = NULL;
    unsigned long extraspace;
    unsigned long newsize;
    unsigned long blen;

    /* Allocations rounded up to nearest 8 bytes */
    if(size & 7)
      size += 8 - (size & 7);
    /* size is now in number of 8 byte blocks */
    size /= 8;

    /* Header is 1 long, highest bit is allocate bit, rest are number of
     * 8 byte spaces (including header) in this block.
     */

    /* Traverse through heap looking for a free space big enough to hold */
    curheap = (unsigned long *) ALIGN_POINTER(dseg_lo);
    while(curheap < (unsigned long *)dseg_hi) {
       blen = BLOCK_LENGTH(*curheap);
       if(IS_ALLOCATED(*curheap) || blen < (size+1)) {
          /* handles the backward coalescing */
          if(frag && !IS_ALLOCATED(*curheap) && !IS_ALLOCATED(*frag)) {
             *frag += *curheap;
             curheap = frag;
             frag = NULL;
             continue;
          }
          frag = curheap;  /* might need this if last block is free */
          curheap += blen;
          continue;
       }

       /* we found somewhere to put it */
       *curheap |= ALLOCATE_MASK;  /* mark as allocated */
       /* if there is room left over for header and at least 1 space,
          fragment this to utilize the left over space */
       if(blen > size + 2) {
          extraspace = blen - size - 1;
          blen = size+1;  /* new length of block */
          *curheap = (blen) | ALLOCATE_MASK;
          frag = curheap + blen;
          *frag = extraspace;
       }      
       return curheap+1;  /* just past header */
     }
 
     /* If we get here it means there we didn't find room for it in our
      * current heap.  So add it to the end. 
      */

      /* Originally we allocated space in whole pages, but this led to
       * severe artificial memory utilization problems in some cases 
       * due to the performance metric counting the input to mem_sbrks
       * and not the actual pages returned from it.  So, only call
       * mem_sbrk with the memory actually needed.  
       */
     if(frag && !IS_ALLOCATED(*frag)) {
        /* if last block was free, can use that as start of new block */
        newsize = (size*8 - (BLOCK_LENGTH(*frag)-1)*8);
        curheap = frag;
        if(mem_sbrk(newsize) == NULL) 
          return NULL;
     }
     else
     {
        newsize = (size+1)*8;
	if(mem_sbrk(newsize) == NULL)
          return NULL;
     }

     
     /* set sizes and make the remainder of page (if any) free */
       *curheap = (size+1) | ALLOCATE_MASK;

     return curheap+1;
}

void mm_free (void *ptr)
{
  unsigned long *curblock, *nextblock; 

   curblock = (unsigned long *)ptr - 1;
   *curblock &= LENGTH_MASK;   /* remove allocated bit */

   /* see if free blocks ahead of it and if so coalese */
   nextblock = curblock + BLOCK_LENGTH(*curblock);
   if((unsigned long *)dseg_hi > nextblock && !IS_ALLOCATED(*nextblock)) {
     *curblock = BLOCK_LENGTH(*curblock) + BLOCK_LENGTH(*nextblock);
   }

}

