/* $Id$ */

/*
 *
 *  CS213 - Lab assignment 3
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>

#include "memlib.h"
#include "malloc.h"

#define SILLYCONSTS

#if 0
#define DEBUG2
#endif

#if 0
#define DEBUG3
#endif

#if 0
#define DEBUG4
#endif

#if 0
#define DEBUG5
#endif

team_t team = {
    /* Team name to be displayed on webpage */
    "Dr. Gopher",
    /* First member full name */
    "Kevin Milans",
    /* First member email address */
    "kgm@andrew.cmu.edu",
    /* Second member full name (leave blank if none) */
    "",
    /* Second member email address (blank if none) */
    ""
};

/* Implementation Notes
   --------------------

   The format of a Used block is very simple:
   [size | 1][.... size - 8 bytes .....][size | 1]

   The format of a free block is also very simple:
   [size][prev][next][size - 16 bytes][size]

   In these examples, size is an integer containing the size
   in bytes of the entire block.  Size will always be a multiple
   of eight, but the mininum block size is 16.  (Thus, possible
   block sizes are 16, 24, 32, etc...).  The [prev] and [next]
   sections of a free block are pointers to the previous and next
   free blocks in the free block linked list.  The list does not
   contain any special ordering of the free blocks.

   My algorithim uses the best fit search method to look through
   all the blocks, which is suprisingly still fast enough for the
   speed requirement.  My intuition is that this would probably be
   too slow to do in libc, so I'd probably have to be more clever (i.e.
   use a more sophistacted approach like segregated lists)
   if the speed requirement were more difficult.

   There are two major tweaks I made to the very simple approach
   of the doubly linked list: the first is in deciding where to put
   the the used block if we are splitting a free block into a used
   block and another free block -- my first approach was to always
   put the used block on the left, but there are many advantages to
   choosing the right upon occasion.  The second is the way I increase
   the heap size of the program proportionally to the current size of
   the heap.

   The first tweak simply follows these rules: if the block we are
   allocating is less than half of the average allocated block size
   so far, then put it on the right.  Otherwise, put it on the left.
   (In the special case where we are expanding the heap, we always put
   the used block on the left.)  My first clue that something like this
   might be a good idea was that when I tried randomly placing blocks
   to the left or right, my average utilization went up by a few %.

   The second tweak allows the increment to the heap to scale from
   512 to 32767, according to the size that the heap currently is.
   It never increases the heap by more than 1/16th its current size,
   so in the worst case (we just extend the heap and the test trials
   are done) the last expansion doesn't penalize my score by more than
   1/16 = 6.25%.  */

#define mseg_hi (dseg_hi + 1)
#define SIZEMASK  0xFFFFFFFE

#define ROUNDBLOCK(x) ((x + 15) & ~7)
#define ROUNDPAGE(x, pageval)  ((x + pageval) & ~pageval)

/* not 16 because of their stupid dseg_hi */
#define MEMUSAGE  (mem_usage() - 23)

/* Either a 0 (block is free) or 1 (block is used) */
#define isUsed(b) (*(int *)b & 1)

/* Macros for getting the size of a block */
#define BlockSize(b) ((int)(*(int *)b & SIZEMASK))
#define UBlockSize(b) BlockSize(b)
#define FBlockSize(b) ((int)(*(int *)b))  /* When we know it's a free block */

/* Macros for setting the size of a block */
#define SetUBlockSize(b, s) { *(int *)b = s | 1; *(int *)(b + s - 4) = s | 1; }
#define SetFBlockSize(b, s) { *(int *)b = s; *(int *)(b + s - 4) = s; }

/* Macros for moving forward and backward by one block */
#define BlockNext(b) (b + BlockSize(b))
#define UBlockNext(b) (b + UBlockSize(b))
#define FBlockNext(b) (b + FBlockSize(b))
#define BlockPrev(b) (b - (*(int *)(b - 4) & SIZEMASK))
#define UBlockPrev(b) BlockPrev(b)
#define FBlockPrev(b) BlockPrev(b)

/* Macros for moving forward and backward through the free block list */
#define FBlockPrevF(b) ((char *)*((int *)(b + 4)))
#define FBlockNextF(b) ((char *)*((int *)(b + 8)))

/* Macros for setting the previous and next nodes of a free block */
#define SetFBlockPrevF(b, p) { FBlockPrevF(b) = p; }
#define SetFBlockNextF(b, n) { FBlockNextF(b) = n; }

/* Misc Macros */
#define SETMAGICBOUNDS { *(int *)(dseg_lo + 16) = -MEMUSAGE | 1; \
		         *(int *)(mseg_hi - 4) = -MEMUSAGE | 1; }

int mm_init (void)
{
  int memusage;
  int *header = (int *)dseg_lo;
  char *p = dseg_lo + 0x14;

  if (!mem_sbrk(mem_pagesize()))
    return -1;

  memusage = MEMUSAGE;

  /* in header[0] we have the number of free blocks */
  header[0] = 1;
  /* in header[1] we have the pointer to the current free block */
  header[1] = (int)p;
  /* in header[2] we have the sum of all blocks malloced */
  header[2] = 0;
  /* in header[3] we have the number of calls to malloc */
  header[3] = 0;

  /* These guys allow for magic looping through the blocks using the
     defined macros.  They're your friend.  */
  SETMAGICBOUNDS;

  SetFBlockSize(p, memusage);
  SetFBlockPrevF(p, p);
  SetFBlockNextF(p, p);

  return 0;
}

void print_mem (void)
{
  int *header = (int *)dseg_lo;
  char *p = (char *)header[1];
  char *ptr = p;
  int i;

  printf("Free Blocks: %d\n", header[0]);
  if (header[0])
    {
      assert(!isUsed(p));
      printf("   current block: %p, size: %d\n", p, FBlockSize(p));
    }
  for (i = 0; i < header[0]; i++)
    {
      assert(!isUsed(p));
      assert((!i) || (p != ptr));
      printf("   free block: %p, size: %d\n", ptr, FBlockSize(ptr));
      ptr = FBlockNextF(ptr);
    }
  if (ptr != p)
    assert(0);
  printf("   checking free blocks backward:");
  for (i = 0; i < header[0]; i++)
    {
      assert(!isUsed(p));
      assert((!i) || (p != ptr));
      ptr = FBlockPrevF(ptr);
    }
  if (ptr != p)
    assert(0);
  printf(" pass\n");
  printf("All Blocks (@ = used, # = free)\n");
  p = ptr = dseg_lo + 0xC;
  do 
    {
      assert(!(BlockSize(ptr) % 0x8) && (BlockSize(ptr) > 0x7));
      assert(*(int *)ptr == *(int *)(ptr + BlockSize(ptr) - 4));
      if (isUsed(ptr))
	printf("@");
      else
	{
#if (! defined(DEBUG4))
	  assert(isUsed(BlockNext(ptr)));
#endif
	  printf("#");
	}
      printf("%d->", BlockSize(ptr));
      ptr = BlockNext(ptr);
      if ((BlockSize(ptr) < 0) && isUsed(ptr))
	{
	  printf("~~~>");
	  ptr = BlockNext(ptr);
	}
    } while (ptr != p);
  printf("\n");
}

inline char *coal (char *b)
{
  char *p = BlockPrev(b);
  char *n = BlockNext(b);
  int bsize = FBlockSize(b);
  int psize = BlockSize(p);
  int nsize = BlockSize(n);
  char *bnextp, *bprevp;
  char *nnextp, *nprevp;
  int *header = (int *)dseg_lo;

#ifdef DEBUG4
  printf("Before coal--------\n");
  print_mem();
#endif

  if (!isUsed(p))
    {
      header[0]--;
      bnextp = FBlockNextF(b);
      bprevp = FBlockPrevF(b);
      SetFBlockNextF(bprevp, bnextp);
      SetFBlockPrevF(bnextp, bprevp);
      SetFBlockSize(p, bsize + psize);
      if ((char *)header[1] == b)
	(char *)header[1] = p;
      b = p;
      bsize = FBlockSize(b);
    }

#ifdef DEBUG4
  printf("Middle coal----------\n");
  print_mem();
#endif

  if (!isUsed(n))
    {
      header[0]--;
      nnextp = FBlockNextF(n);
      nprevp = FBlockPrevF(n);
      SetFBlockNextF(nprevp, nnextp);
      SetFBlockPrevF(nnextp, nprevp);
      SetFBlockSize(b, bsize + nsize);
      if ((char *)header[1] == n)
	(char *)header[1] = b;
    }

#if ((defined(DEBUG2)) || (defined(DEBUG4)))
  printf("End coal--------\n");
  print_mem();
#endif
  return b;
}

inline char *split (char *b, int usedsize, int freesize, int goright)
{
  char *u;
  char *f;
  char *next, *prev;
  int *header = (int *)dseg_lo;

#ifdef DEBUG5
  assert(FBlockSize(b) == usedsize + freesize);
#endif

  if (goright)
    {
      u = b + freesize;
      f = b;
    }
  else
    {
      u = b;
      f = b + usedsize;
    }

  SetUBlockSize(u, usedsize);
  SetFBlockSize(f, freesize);
  header[1] = (int)f;

  if (header[0] == 1)
    {
      SetFBlockNextF(f, f);
      SetFBlockPrevF(f, f);
    }
  else
    {
      prev = FBlockPrevF(b);
      next = FBlockNextF(b);
      SetFBlockNextF(f, next);
      SetFBlockPrevF(f, prev);
      SetFBlockNextF(prev, f);
      SetFBlockPrevF(next, f);
    }
#ifdef DEBUG2
  print_mem();
#endif
  return u;
}

void *mm_malloc (size_t size)
{
  int *header = (int *)dseg_lo;
  char *p = (char *)header[1];
  char *newfblock;
  char *prev;
  char *next;
  void *toret = NULL;
  int x;
  int realsize = ROUNDBLOCK(size);
  char *bp = NULL;
  int bpsize = 0;
  int increment;
  int average;
  int pagevar;

  header[3]++;
  header[2]+=realsize;

#ifdef SILLYCONSTS
  /* I have no idea why 1.78 seems to work the best.  But it does.
     The optimal value for this guy may depend upon the size of the
     block we're currently allocating, and it may not.  I'm not sure. */
  average = (header[2] / header[3]) / 1.78;
#else
  average = (header[2] / header[3]) / 2;
#endif

#ifdef DEBUG2
  print_mem();
#endif

  /* Find a good candidate block to use */
  /* Apparently Fragmentation is low enough that I can actually use
     the best fit method and stay as fast as required.  So I will.  */
  for (x = 0; x < header[0]; x++)
    {
      if ((FBlockSize(p) >= realsize) && (!bp || (FBlockSize(p) < bpsize)))
	{
	  bp = p;
	  bpsize = FBlockSize(bp);
	}
      p = FBlockNextF(p);
    }
  
  if (bp)
    {
      /* NOTE!! prev, next are only meaningful if they point to
	 different free blocks than bp, which is the case if n != 1.
	 Otherwise, if there is just one free block, they point to
	 themselves and very shortly prev & next will not point to
	 a free block as the current block `bp' is about to be
	 either transformed or split. */
      prev = FBlockPrevF(bp);
      next = FBlockNextF(bp);

      if ((bpsize == realsize) || (bpsize - 8 == realsize))
	{
	  /* In this case we have no splitting, but an exact fit,
	     or close to an exact fit anyway.  If we're only close,
	     use most of the memory for the block and use the other
	     8 bytes for a zombie block.  later on be smarter about
	     whether to put the zombie block on the left or right.  */
	  header[0]--;
	  if (header[0])
	    {
	      header[1] = (int)next;
	      SetFBlockNextF(prev, next);
	      SetFBlockPrevF(next, prev);
	    }
	  else
	    (char *)header[1] = NULL;
	  
	  SetUBlockSize(bp, bpsize);

	  toret = (void *)(bp + 4);
	}
      else if (realsize < bpsize)
	{
	  /* Here we have to do some splitting */
	  /* Put the used block on the right if the size of the used
	     block is less than about 1/2 of the average */
	  toret = (void *)(4 + split(bp, realsize, bpsize - realsize, (realsize < average)));
	}
    }
  if (!toret)
    {
      /* increase heap space here.  Heap space grows proportionally to
         the existing heap space size.  */
      newfblock = mseg_hi - 4;
      prev = BlockPrev(newfblock);
      if (isUsed(prev))
	x = 0;
      else
	x = FBlockSize(prev);

      pagevar = (((mem_usage() >> 4) & 0x7FFF) | 0xFF);
      pagevar = pagevar | (pagevar >> 1);
      pagevar = pagevar | (pagevar >> 2);
      pagevar = pagevar | (pagevar >> 4);

      increment = ROUNDPAGE(realsize - x, pagevar);
      if (!mem_sbrk(increment))
	return NULL;

      SETMAGICBOUNDS;

      if (header[0])
	{
	  next = (char *)header[1];
	  prev = FBlockPrevF(next);
	  SetFBlockNextF(prev, newfblock);
	  SetFBlockPrevF(next, newfblock);
	}
      else
	next = prev = (char *)header[1] = newfblock;

      header[0]++;
      SetFBlockSize(newfblock, increment);
      SetFBlockNextF(newfblock, next);
      SetFBlockPrevF(newfblock, prev);

      /* Use bp as the place for the block.  Here we basically
         emulate another call to mm_malloc().  This was at one point
         a recursive call (i.e. just toret = mm_malloc(size) after
         doing the coal(newfblock)).  But there is many a speed
         enhancement by explicitly defining the work here -- this way
         we do not have to search for the best place pointer `bp'. */
      bp = coal(newfblock);
      bpsize = FBlockSize(bp);
      /* NOTE!! prev, next are only meaningful if they point to
	 different free blocks than bp, which is the case if n != 1.
	 Otherwise, if there is just one free block, they point to
	 themselves and very shortly prev & next will not point to
	 a free block as the current block `bp' is about to be
	 either transformed or split. */
      prev = FBlockPrevF(bp);
      next = FBlockNextF(bp);
#ifdef DEBUG2
      print_mem();
#endif
      if ((bpsize == realsize) || (bpsize - 8 == realsize))
	{
	  /* In this case we have no splitting, but an exact fit,
	     or close to an exact fit anyway.  If we're only close,
	     use most of the memory for the block and use the other
	     8 bytes for a zombie block.  later on be smarter about
	     whether to put the zombie block on the left or right.  */
	  header[0]--;
	  if (header[0])
	    {
	      header[1] = (int)next;
	      SetFBlockNextF(prev, next);
	      SetFBlockPrevF(next, prev);
	    }
	  else      /* Now just use bp as the place for the block */
	    (char *)header[1] = NULL;
	  
	  SetUBlockSize(bp, bpsize);
	  toret = (void *)(bp + 4);
	}
      else if (realsize < bpsize)
	toret = (void *)(4 + split(bp, realsize, bpsize - realsize, 0));

#ifdef DEBUG2
      printf("%%%%increment: %d%%%%%p\n", increment, newfblock);
#endif
    }

#ifdef DEBUG2
  print_mem();
#endif
#ifdef DEBUG3
  printf("malloc summary: used block size %d for %d\n", bpsize, size);
#endif

  return toret;
}

void mm_free (void *ptr)
{
  char *p = (char *)(ptr - 4);
  int bsize = UBlockSize(p);
  int *header = (int *)dseg_lo;
  char *next, *prev;

#ifdef DEBUG2  
  print_mem();
#endif

  SetFBlockSize(p, bsize);
  if (header[0])
    {
      next = (char *)header[1];
      prev = FBlockPrevF(next);
      SetFBlockNextF(p, next);
      SetFBlockPrevF(p, prev);
      SetFBlockNextF(prev, p);
      SetFBlockPrevF(next, p);
      header[0]++;
      coal(p);
    }
  else
    {
      SetFBlockNextF(p, p);
      SetFBlockPrevF(p, p);
      header[1] = (int)p;
      header[0]++;
    }
#ifdef DEBUG2
  print_mem();
#endif
}
