#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <strings.h>
#include <unistd.h>
#include <iostream.h>
#include <stdio.h>
#include <netdb.h>
#include <signal.h>

#include <arpa/inet.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/resource.h>  /* SUN rlimit */
#include <netinet/in.h>	/* sockaddr_in{} and other Internet defns */

#include <Util/inetMisc.h>  /* MAX_PAYLOAD_SIZE defined */

#include "../Gossip/gossipAgent.h"
#include "dataMgrTcp.h"

DataMgrTcp::DataMgrTcp(int DaemonPort, int TcpWndSize):DataMgrUdp(DaemonPort) {

  if(verbosity > 0){
    printf("\ndata mgr is TCP");
  }

  this->TcpWndSize = TcpWndSize;
  assert(TcpWndSize > 0 && TcpWndSize <= 65535);
  
  ConnHead = NULL;

  DelayTable = new Hashtable();

  /* open a TCP stream, reuse addr */
  tcpFD = Socket(AF_INET, SOCK_STREAM, 0);  
  SetsockoptReuseAddrPort(tcpFD);
  // SetsockNonBlocking(tcpFD);

  /* bind to port DaemonPort + 1 */
  struct sockaddr_in sock;
  bzero((char *)&sock, sizeof(sock));
  sock.sin_family = AF_INET;
  sock.sin_addr.s_addr = htonl(INADDR_ANY);
  sock.sin_port = htons(DaemonPort+1);
  Bind(tcpFD, (struct sockaddr *)&sock, sizeof(sock));
    
  /* listen on the port, allow 30 pending connections */
  Listen(tcpFD, 30); 

  /* ignore the SIGPIPE (will be handled by EPIPE in write() */
  signal(SIGPIPE, SIG_IGN);
}


DataMgrTcp::~DataMgrTcp() {
  while (ConnHead != NULL) {
    DelConn(ConnHead);
  }
  delete DelayTable;
}


int DataMgrTcp::SetFD(fd_set *rs, fd_set *ws) {
  DataMgrUdp::SetFD(rs, ws);

  FD_SET(tcpFD, rs);
  
  for (struct TcpConn *conn = ConnHead; conn != NULL; conn = conn->next) {
    switch(conn->type) {
    case TCP_SND:
      if ((conn->dir.snd.status==TCP_CONNECTING) ||
	  (! conn->dir.snd.pktBuf->IsEmpty()) ||
	  (conn->dir.snd.incSize > 0)) {
	FD_SET(conn->fd, ws);
      }
      break;
    case TCP_RCV:
      FD_SET(conn->fd, rs);
      break;
    default:
      assert(! "wrong type");
    }
  }

  return MAX_SETFD_TIME;
}


int DataMgrTcp::ReadFromNetwork(fd_set *rs, fd_set *ws, char *buf, 
			     int maxBufLen, int *fromAddr, int *fromPort) {

  int ret = DataMgrUdp::ReadFromNetwork(rs, ws, buf, maxBufLen, 
					fromAddr, fromPort);
  if (ret >= 0) return ret;
  
  struct sockaddr_in sock;
  socklen_t socklen = sizeof(sock);


  /* handle new data connection request */
  if (FD_ISSET(tcpFD, rs)) {
    FD_CLR(tcpFD, rs);
    
    int new_fd = Accept(tcpFD, (struct sockaddr *)&(sock), &socklen);
    int new_addr = htonl(sock.sin_addr.s_addr);
    
    SetsockNonBlocking(new_fd);
    SetsockoptReuseAddrPort(new_fd);
    SetsockoptBufSize(new_fd, this->TcpWndSize);

    /* add the new fd */
    struct TcpConn *conn = GetConnByAddr(new_addr, TCP_RCV);
    if (conn != NULL) {
      fprintf(stderr, "\nDataMgr: conn to %s was previously established", 
              GetNameByAddr(conn->addr));
      DelConn(conn);
    }
    AddConn(new_fd, new_addr, TCP_RCV, TCP_CONNECTED);
  }
  

  /* snd requests */
  for (struct TcpConn *conn = ConnHead; conn != NULL;) {
    switch(conn->type) {
    case TCP_SND: {
      /* connection is establishing */
      if (conn->dir.snd.status == TCP_CONNECTING) {
	
	/* no status yet */
	if (! FD_ISSET(conn->fd, ws)) {
	  conn = conn->next;
	  continue;
	}
	
	/* check connect error status */
	int error;
	socklen_t n = sizeof(error);

	/* prevent core dump at maraca: (was using Getsockopt())
	 * getsockopt: Connection refused at maraca 
	 */
	if (getsockopt(conn->fd, SOL_SOCKET, SO_ERROR, &error, &n) < 0) {
	  MyWarning("getsockopt failed %s: ", GetNameByAddr(conn->addr));
	  perror("getsockopt");
	  
	  error = -1;
	}
	
	if (error != 0) {
	  if(verbosity > 1){
	    printf("\nnonblocking connect failed");
	  }
	  DelConn(conn);
	  conn = ConnHead;  /* not optimal, but works */
	  continue;
	}
	
	/* connection successful */
	conn->dir.snd.status = TCP_CONNECTED;
	UpdateAccess(conn);
      }
      
      
      /* handling outstanding write request */
      if (FD_ISSET(conn->fd, ws)) {
	FD_CLR(conn->fd, ws);
	SendData(conn);
      }
      conn = conn->next;
      break;
    }

    case TCP_RCV: {
      if (conn->dir.rcv.parseFlag) {
	/* parse the data packet, find the boundary end */
	assert((conn->dir.rcv.size > 0) && (conn->dir.rcv.size < 2*MAX_PAYLOAD_SIZE));
	
	int pos=0;
	
	if(conn->dir.rcv.size >= (int)sizeof(GossipMsgHdr)){
	  GossipMsgHdr *hdrPtr= (GossipMsgHdr *)(conn->dir.rcv.buf);
	  pos=ntohl(hdrPtr->size);
	}  
	
	/* incomplete data */
	if (
	    (conn->dir.rcv.size < (int)sizeof(GossipMsgHdr)) ||
	    (
	     (conn->dir.rcv.size >= (int)sizeof(GossipMsgHdr)) &&
	     (pos > conn->dir.rcv.size)
	    )
	    ){
	  assert(conn->dir.rcv.size <= MAX_PAYLOAD_SIZE);
	  conn->dir.rcv.parseFlag = FALSE;
	  conn = conn->next;
	  continue;
	}
      
	/* copy the data over */
	bcopy(conn->dir.rcv.buf, buf, pos);
	
	/* pos < conn->dir.rcv.size meaning extra data left */
	if (pos < conn->dir.rcv.size) {
	  conn->dir.rcv.size -= pos;
	  bcopy(conn->dir.rcv.buf+pos, conn->dir.rcv.buf, conn->dir.rcv.size);
	  conn->dir.rcv.parseFlag = TRUE;
	}
	else{
	  /* exact match: (pos == conn->dir.rcv.size) */
	  assert(pos == conn->dir.rcv.size);
	  
	  conn->dir.rcv.size = 0;
	  conn->dir.rcv.parseFlag = FALSE;
	}
	*fromAddr = conn->addr;
	*fromPort = DaemonPort;  /* bogus now XXX */
	return pos; 
      }
      
      if (! FD_ISSET(conn->fd, rs)) {
	conn = conn->next;
	continue;
      }
      
      FD_CLR(conn->fd, rs);
      int bufLen = recv(conn->fd, buf, MAX_PAYLOAD_SIZE, 0);
      
      if (bufLen < 0) {
	perror("recv");
	DelConn(conn);
	conn = ConnHead;  /* not optimal, but works */
	continue;
      }
      
      if (bufLen == 0) {
	DelConn(conn);
	conn = ConnHead;  /* not optimal, but works */
	continue;
      }
      
      UpdateAccess(conn);
      assert((conn->dir.rcv.size <= MAX_PAYLOAD_SIZE) && 
	     (conn->dir.rcv.size >= 0));
      assert((bufLen <= MAX_PAYLOAD_SIZE) && (bufLen > 0));
      bcopy(buf, (conn->dir.rcv.buf)+(conn->dir.rcv.size), bufLen);
      conn->dir.rcv.size += bufLen;
      conn->dir.rcv.parseFlag = TRUE;
      
      break;
    }

    default:
      assert(! "\nunknown type");
    }
  }

  return -1;    /* no data */
}


void DataMgrTcp::DelConn(struct TcpConn *connIn) {
  assert(connIn != NULL);

  struct TcpConn *parent = NULL;

  for (struct TcpConn *conn = ConnHead; conn != NULL; conn = conn->next) {
    if (conn == connIn) {
      close(conn->fd);
      if(verbosity > 1){
	printf("\nconnection to %s is closed", GetNameByAddr(conn->addr));
      }

      /* relink the link list */
      if (ConnHead == conn) ConnHead = conn->next;
      else parent->next = conn->next;
	
      free(conn);
      return;
    }
    parent = conn;
  }
  assert(! "\nDelConn: fd is not found");
}


/* Garbage collect idle connections after GC_DATACONN_TIMER */
void DataMgrTcp::GCConn() {
  long currTime = GetCurrTime();
  struct TcpConn *conn = ConnHead; 

  while (conn != NULL) {
    if (currTime - conn->lastAccess > GC_DATACONN_TIMER) {
      if(verbosity > 1){
	printf("\nGC data connection to %s", GetNameByAddr(conn->addr));
      }
      DelConn(conn);
      conn = ConnHead;  /* not optimal, but works */
      continue;
    }
    conn = conn->next;
  }
}


/* return TcpConn if addr is connected, NULL otherwise */
struct TcpConn *DataMgrTcp::GetConnByAddr(int addr, TcpConnType type) {
  for (struct TcpConn *conn = ConnHead; conn != NULL; conn = conn->next) {
    if ((conn->addr == addr) && (conn->type == type)) return conn;
  }
  return NULL;
}


struct TcpConn *DataMgrTcp::GetConnByFD(int fd, TcpConnType type) {
  for (struct TcpConn *conn = ConnHead; conn != NULL; conn = conn->next) {
    if ((conn->fd == fd) && (conn->type == type)) return conn;
  }
  return NULL;
}


/* add a connection, return 0 on success, -1 if failed */
struct TcpConn *DataMgrTcp::AddConn(int fd, int addr, TcpConnType type, 
				 ConnStatus status) {
  assert(GetConnByAddr(addr, type) == NULL);
  
  struct TcpConn *conn = (struct TcpConn *)malloc(sizeof(struct TcpConn));

  conn->addr = addr;
  conn->fd = fd;
  UpdateAccess(conn);
  conn->next = ConnHead;
  ConnHead = conn;
  conn->type = type;

  switch(type) {
  case TCP_SND:
    conn->dir.snd.status = status;
    conn->dir.snd.pktBuf = new PacketBuffer(MAX_DATAMGR_QLEN);
    conn->dir.snd.incSize = 0;
    break;
  case TCP_RCV:
    conn->dir.rcv.size = 0;
    conn->dir.rcv.parseFlag = FALSE;
    break;
  default:
    assert(! "\nunknown type");
  }

  GCConn();  /* perform GC on outdate connections */

  return conn;
}

/* return a fd or -1 if failed (perhaps the remote host died)
 * the fd can be in connecting or connected state (non-blocking connect())
 */
struct TcpConn *DataMgrTcp::EstablishConn(int addr) {
  
  long cur_time = GetCurrTime();

  /* XXX this only works if deadline is 32 bits */
  int deadline = (int)(DelayTable->Get((void *)addr));

  /* delay connection expired */
  if ((deadline !=0) && (cur_time - deadline >= 0)) {
    DelayTable->Remove((void *)addr);
    printf("\ntimer expired %ld %d", cur_time, deadline);
    deadline = 0;
  }
  
  /* delayed connection */
  if (deadline != 0) {
    return NULL;
  }
  
  struct sockaddr_in sin;
  int new_fd = Socket(AF_INET, SOCK_STREAM, 0);
  
  SetsockNonBlocking(new_fd);
  SetsockoptReuseAddrPort(new_fd);
  SetsockoptBufSize(new_fd, this->TcpWndSize);

  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl(addr);
  sin.sin_port = htons(DaemonPort+1);

  errno=0;
  if (connect(new_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
    if (errno != EINPROGRESS) {
      perror("connect");
      close(new_fd);

      /* add a delayed connection (2 seconds) */
      deadline = ((int)cur_time) + 2000;
      DelayTable->Put((void *)addr, (void *)deadline);

      return NULL;
    }
    return AddConn(new_fd, addr, TCP_SND, TCP_CONNECTING);
  } else {
    return AddConn(new_fd, addr, TCP_SND, TCP_CONNECTED);
  }
}

  
/* return the # of active connections */
int DataMgrTcp::Size() {
  int cnt = 0;

  this->GCConn();
  for (struct TcpConn *conn = ConnHead; conn != NULL; conn = conn->next) {
    cnt++;
  }
  return cnt;
}

void DataMgrTcp::UpdateAccess(struct TcpConn *conn) {
  assert(conn != NULL);
  conn->lastAccess = GetCurrTime();
}

/* return # of bytes sent if succeed, < 0 if failed */
int DataMgrTcp::SendToNetwork(int toAddr, int toPort, const char *buf, 
			      int bufLen, PacketType packetType, 
			      int priority) {
  
  switch(packetType) {
  case CONTROL:
  case PROBE:
  case MEASUREMENT:
    return DataMgrUdp::SendToNetwork(toAddr, toPort, buf, bufLen, 
				     packetType, priority);
    break;
  case DATAPACKET:
  case POKE:
    break;  /* continue process */
  default:
    MyError("DataMgrTcp: unknown case");
    break;
  }
  
  /* check arguments */
  assert(bufLen <= MAX_PAYLOAD_SIZE);

  /* get/establish the connection */
  struct TcpConn *conn = GetConnByAddr(toAddr, TCP_SND);
  if (conn == NULL) {
    /* establish a connection if it does not exist */
    if ((conn = EstablishConn(toAddr)) == NULL) {
      return -1;
    }
  }
  
  struct TcpConnSnd *snd = &(conn->dir.snd);

  /* XXX comment it out to implement tail drop (was here 4/02 -- yhchu)
   * buffer full, drop the packet 
   if (snd->pktBuf->IsFull()) {
   if (verbosity > 1) { 
   printf("\ndrop a packet of len %d due to sndbuf overflow", bufLen);
   }
   return -1;
   }
  */

  if ((! snd->pktBuf->IsEmpty()) || snd->incSize > 0) {
    snd->pktBuf->Enqueue(buf, bufLen, priority);
    return SendData(conn);  
  }

  return SendBuffer(conn, buf, bufLen);
}


/* send buf of size bufLen to TCP socket.  Return bytes sent
 * if failed (or only a fraction of the buf is sent), data will be stored
 * at incBuf with size incSize.
 * 
 * if no data to send, return -1 
 * if data is blocked, return -2
 * if connection is still being established, return -3
 */
int DataMgrTcp::SendBuffer(struct TcpConn *conn, const char *buf, 
			   int bufLen) {

  assert(conn->dir.snd.incSize == 0 || conn->dir.snd.incSize == bufLen);
  
  if (conn->dir.snd.status == TCP_CONNECTING) {
    conn->dir.snd.incSize = bufLen;
    bcopy(buf, conn->dir.snd.incBuf, conn->dir.snd.incSize);
    return -3;
  } 

  errno=0;
  /* non-blocking send */
  int bytesSent;
  if ((bytesSent = send(conn->fd, buf, bufLen, 0)) >= 0) {
    /* only a fraction of the data is sent, buffer the rest */
    if (bytesSent < bufLen) {
      conn->dir.snd.incSize = bufLen - bytesSent;
      bcopy(buf+bytesSent, conn->dir.snd.incBuf, conn->dir.snd.incSize);
    } else {
      conn->dir.snd.incSize = 0;
    }
    
    UpdateAccess(conn);
    return bytesSent;
  }


  
  switch(errno) {
  case EWOULDBLOCK:
    if(verbosity > 1){
      printf("\npacket of len %d not sent due to non-blocking send", bufLen);
    }

    conn->dir.snd.incSize = bufLen;
    bcopy(buf, conn->dir.snd.incBuf, conn->dir.snd.incSize);

    return -2;
    break;
  case EMSGSIZE:
    /* just want to capture this errno, nothing bad */
    /*****
	  Sanjay: is it a small bug here? 12/27/2000
	  If you just break, you would enter the assert(0) after switch.
    **********/
    assert(! "EMSGSIZE: dummy assert");
    break;
  case EPIPE:
  case ECONNREFUSED:
  case ENOTCONN:
  case ECONNRESET:   /* Connection reset by peer */
    /*******
	Sanjay: I added ENOTCONN here on 12/27/2000
	I found some machines would exit with "Transport end point not connected"
	This usually happened to nodes which were a parent of sahir
    ********/
    // if(errno == ENOTCONN){
    //  fprintf(stderr,"\n %s : Received ENOTCONN err!!",getInetAddrName(GetMyAddr()));
    //  fflush(stderr);
    //}
    DelConn(conn);
    return -1;
    break;
  default:
    perror("send");
    MyError("dataMgrTcp: send errno = %d", errno);
    //DelConn(conn);
    //return -1;
    assert(0);
  }
  
  /* should never reach here */
  assert(0);
  return 0;
}

/* pump data to the TCP buffer as much as it can (notice the for loop)
 * until the TCP kernel buffer is full
 * 
 * return sum of all bytes sent
 * if no data to send, return -1 
 * if data is blocked, return -2
 * if connection is still being established, return -3
 */
int DataMgrTcp::SendData(struct TcpConn *conn) {
  int bytesSent = 0;
  char *buf; int bufLen;

  for (;;) {
    if (conn->dir.snd.incSize > 0) {
      bufLen = conn->dir.snd.incSize;
      buf = conn->dir.snd.incBuf;
    } else {
      if (conn->dir.snd.pktBuf->IsEmpty()) { return -1; }
      conn->dir.snd.pktBuf->Dequeue(&buf, &bufLen);
    } 

    int ret = SendBuffer(conn, buf, bufLen);

    if (ret < 0) {
      if (bytesSent == 0) return ret;
      else return bytesSent;
    }
    bytesSent += ret;
  }
}
