(*

	FoxNet: The Fox Project's Communication Protocol Implementation Effort
	Edo Biagioni (esb@cs.cmu.edu)
	Fox Project
	School of Computer Science
	Carnegie Mellon University
	Pittsburgh, Pa 15139-3891

	i.	Abstract

	tcpstate.fun: the implementation for the TCP finite state machine.
	Page numbers in the comments refer to page numbers in RFC 793,
	the definition of TCP [793]. Other commonly-referred to documents are:
	[1122] RFC 1122, Requirements for Internet Hosts (1989)
	[VJ88] Van Jacobson, Congestion Avoidance and Control,
	       ACM SIGCOMM-88, August 1988, pp 314-329
	[KP87] Karn and Partridge, Improving Round-Trip Time Estimates
	       in Reliable Transport Protocols, ACM SIGCOMM-87,
	       August 1987, pp 2-7


	ii.	Table of Contents

	i.	Abstract
	ii.	Table of Contents
	1.	functor Tcp_Send
	2.	internal error function no_connection
	3.	internal error function connection_closing
	4.	internal function make_segment
	5.	internal function split_segment
	6.	internal function next_buffer
	7.	internal function restart_timer
	8.	internal function can_send
	9.	internal function get_data_size
	10.	internal function send_tcb
	11.	function send
	12.	internal function send_packet
	13.	function send_state


	1.	functor Tcp_Send
*)

functor Tcp_Send (structure Tcp_Tcb: TCP_TCB
		  structure Outgoing: EXTERNAL
		  structure Retransmit: TCP_RETRANSMIT
		  structure B: FOX_BASIS
		  sharing type Tcp_Tcb.tcp_tcb = Retransmit.Tcb.tcp_tcb
		      and type Tcp_Tcb.tcp_out = Retransmit.Tcb.tcp_out
		      and type Tcp_Tcb.outgoing_data = Outgoing.T
		  val debug_level: int ref option): TCP_SEND =
 struct
  type tcp_state = Tcp_Tcb.tcp_state
  type tcp_out = Tcp_Tcb.tcp_out
  type send_packet = Outgoing.T
  type segment = Tcp_Tcb.tcp_out

  datatype send_state = Valid_Send | Opening_Send | Closing_Send

  val zero32 = Word32.fromInt 0
  val one32 = Word32.fromInt 1
  val two32 = Word32.fromInt 2
  val hundred32 = Word32.fromInt 100

  local

   structure Trace = Trace (structure V = B.V
			    val debug_level = debug_level
			    val module_name = "tcpsend.fun"
			    val makestring = fn _ => NONE)

(*
	2.	internal error function no_connection

	[793] p. 41
*)

   fun no_connection (name, tcb as (Tcp_Tcb.Tcb {to_do, ...})) =
        let val action = Tcp_Tcb.User_Error (name ^ ": connection is closed")
        in to_do := Tcp_Tcb.Q.add (! to_do, action);
	   tcb
        end

(*
	3.	internal error function connection_closing

	[793] p. 41

	The send function in tcpmain.fun should detect when we are
	sending in an illegal state and raise a corresponding exception,
	so this function should never be called.  If it is, something
	is seriously wrong, so we close the connection, stop the window
	timer, and delete any window probes from the to_do list.  This
	will keep this error from recurring.
*)

   fun to_do_string (action, "") = Tcp_Tcb.action_string action
     | to_do_string (action, rest) =
        Tcp_Tcb.action_string action ^ ", " ^ rest

   fun connection_closing_actions (name, to_do,
				   timers: Tcp_Tcb.tcp_timer_set) =
        let val error = Tcp_Tcb.User_Error (name ^ ": connection closing")
	    val close = Tcp_Tcb.Delete_Tcb
	    fun nullify_action Tcp_Tcb.Probe_Window =
	         Tcp_Tcb.User_Error "cleared window probe"
	      | nullify_action action = action
        in to_do := Tcp_Tcb.Q.add (! to_do, error);
           to_do := Tcp_Tcb.Q.add (! to_do, close);
	   to_do := Tcp_Tcb.Q.map nullify_action (! to_do);
	   (#stop_window timers) ()
        end

   fun connection_closing (name, tcb as (Tcp_Tcb.Tcb {to_do, timers, ...})) =
        (connection_closing_actions (name, to_do, timers);
	 Trace.local_print ("connection closing, to_do list is " ^
			    Tcp_Tcb.Q.fold to_do_string (! to_do) "" ^
			    ", tcb is " ^
			    Tcp_Tcb.tcb_string tcb);
	 tcb)

(*
	4.	internal function make_segment
*)

   fun make_segment (tcb, data, length, false) =
        let val Tcp_Tcb.Tcb {snd_nxt, rcv_wnd, rcv_nxt,
			     rcv_wnd_scale, ts, ...} = tcb
	    val wnd = Word32.>> (! rcv_wnd, ! rcv_wnd_scale)
	in case ! ts of
	      NONE =>
	       Tcp_Tcb.Fast_Out {seq = ! snd_nxt, ack = ! rcv_nxt,
				 len = length, wnd = wnd, data = data}
	    | SOME {recent, last_ack_sent} =>
	       (last_ack_sent := ! rcv_nxt;
		Tcp_Tcb.Timestamp_Out {seq = ! snd_nxt, ack = ! rcv_nxt,
				       len = length, wnd = wnd, data = data,
				       times =
				       {send_time = Tcp_Tcb.current_time (),
					echo = ! recent}})
        end
     | make_segment (tcb, data, length, true) =	(* urgent *)
        let val Tcp_Tcb.Tcb {snd_nxt, rcv_wnd, rcv_nxt,
			     rcv_wnd_scale, ts, ...} = tcb
	    val wnd = Word32.>> (! rcv_wnd, ! rcv_wnd_scale)
	    val up = Word32.- (length, one32)
	    val options = case ! ts of
	                     NONE => []
	                   | SOME {recent, last_ack_sent} =>
			      (last_ack_sent := ! rcv_nxt;
			       [Tcp_Tcb.Timestamp
			         {send_time = Tcp_Tcb.current_time (),
				  echo = ! recent}])
	    val segment = Tcp_Tcb.Seg {seq = ! snd_nxt, ack = ! rcv_nxt,
				       len = length, wnd = wnd,
				       up = up, options = options,
				       syn_flag = false, fin_flag = false,
				       reset_flag = false, ack_flag = true,
				       push_flag = true, urgent_flag = true}
	in Tcp_Tcb.Out_Seg {seg = segment, data = data}
        end

(*
	5.	internal function split_segment

	Split the segment into chunks of the specified size
	and place all but the first chunk back onto the queue.
	Order is important.
*)

   fun split_segment (data, split, tcb, queue, urg) =
        let val word_split = Word.fromInt (Word32.toInt split)
 	    val rest = (Word32.fromInt o Word.toInt)
		       (Outgoing.size data - word_split)
 	    val (packet1, packet2) = Outgoing.split (data, word_split)
	in (make_segment (tcb, packet1, split, urg), split, NONE,
	    Tcp_Tcb.D.add_to_front (queue, (packet2, rest, urg, NONE)))
        end

(*
	6.	internal function next_buffer

	This function returns the first buffer, if
	- there is only one buffer (or no buffer), or
	- the sum of the lengths of the first two buffers exceeds the limit
	Otherwise, this function combines the first two buffers
	and tries again.

	This combining function is used to send fewer packets across
	the net and fewer TCP headers.  It can be very useful when
	sending large numbers of small packets, but it can also be
	slow since it copies the data twice.
*)

   local
    fun get_next (first as (first_packet, first_length, true, _), queue, _) =
         (queue, first)			(* urgent, send by itself. *)
      | get_next (first as (first_packet, first_length, _, _), queue, limit) =
         if Word32.>= (first_length, limit) then (queue, first)
	 else
	  case Tcp_Tcb.D.first queue of
	     NONE => (queue, first)
	   | SOME (new_queue, (_, _, true, _)) =>
	      (queue, first)		(* urgent data, don't merge*)
	   | SOME (new_queue,
		   second as (second_packet, second_length, _, _)) =>
	      if Word32.>= (Word32.+ (first_length, second_length),
			       limit) then
	       (queue, first)
	      else		(* combine the first two buffers *)
	       let val packet = Outgoing.join (first_packet, second_packet)
 		   val length = Word32.fromInt (Word.toInt
						(Outgoing.size packet))
	       in get_next ((packet, length, false, NONE),
			    new_queue, Word32.- (limit, length))
	       end

   in (* local *)
    fun next_buffer (queue, limit) =
         case Tcp_Tcb.D.first queue of
	    NONE => NONE
	  | SOME (new_queue, first) =>
	     SOME (get_next (first, new_queue, limit))
   end (* local *)

(*
	7.	internal function restart_timer
*)

   fun restart_timer (tcb as (Tcp_Tcb.Tcb {to_do, srto, timers, ...})) =
        ((#start_window timers) (! srto);
	 tcb)

(*
	8.	internal function can_send

	This function tries to implement part of the sender
	Silly-Window-Syndrome avoidance algorithm (RFC 1122, p. 98,
	4.2.3.4). A segment can be sent only if one of the following holds:
	- min (window, data queued) >= mss
	- unacked = 0 and all queued data fits in the window
	- unacked = 0 and 1/2 max_snd_wnd <= min (data queued, window)
	- the window timer has expired
	The last case is recognized by max_snd_wnd being zero.

	Note if unacked = 0, then can_send fails exactly when
	data size > window, window < mss, max_wnd <> 0, and
        max_wnd > window * 2.
 *)

  local
   open Word32	(* all arithmetic here is 32-bit unsigned *)

   val toString = Word32.fmt StringCvt.DEC
  in

   fun can_send (window, unacked:Word32.word, max_wnd, mss, data_size) =
        if data_size <= window then
	 data_size >= mss orelse unacked = zero32 orelse max_wnd = zero32
	else
	 window >= mss orelse
	 (unacked = zero32 andalso max_wnd <= window * two32) orelse
	 max_wnd = zero32

   fun can_string (window, unacked, max_wnd, mss, data_size) =
        "can_send (window " ^ toString window ^
        ", unacked " ^ toString unacked ^
        ", max_wnd " ^ toString max_wnd ^
        ", mss " ^ toString mss ^
        ", data_size " ^ toString data_size ^ ") = " ^
	Bool.toString (can_send (window, unacked, max_wnd, mss, data_size))

  end (* local *)

(*
	9.	internal function get_data_size
*)

   fun get_data_size (NONE, _) = zero32
     | get_data_size (SOME (data_queue, (packet, size, _, _)), max) =
        let val new_size = Word32.fromInt (Word.toInt (Outgoing.size packet))
	in if Word32.>= (new_size, max) then new_size
	   else Word32.+ (new_size,
			  get_data_size (Tcp_Tcb.D.first data_queue,
					 Word32.- (max, new_size)))
	end

(*
	10.	internal function send_tcb
*)

   fun send_tcb (tcb, in_loop) =
        let val Tcp_Tcb.Tcb {snd_nxt, snd_wnd, snd_una, max_snd_wnd,
			     send_immediately, unacked_segs,
			     mss, srtt, srto, cwnd, ssthresh,
			     queued, to_do, timers, rcv_sws, ...} = tcb
	    val debug_snd_nxt = ! snd_nxt
	    val unacked = Word32.- (! snd_nxt, ! snd_una)
	    val send_window = ! snd_wnd
	    val congestion_window = Word32.fromInt (! cwnd)
	    val total_window = Word32.min (send_window, congestion_window)
	    val window_size = Word32.- (total_window,
					   Word32.min
					   (total_window, unacked))
	    val b4mss = Word32.fromInt (Word16.toInt (! mss))
             (* when checking the data size, the largest useful number
                is the larger of the MSS and the window size. *)
	    val data_queue = ! queued
	    val data_size = get_data_size (Tcp_Tcb.D.first data_queue,
					   Word32.max (b4mss, window_size))
	    val send_limit = Word32.min (window_size, b4mss)
	     (* When next_buffer sees two packets whose total
	        length is less than the limit, it combines them.  We
		only want to combine very small packets, so we use
		a limit of at most 100. *)
	    val combine_limit = Word32.min (send_limit, hundred32)
	     (* for can_send, use an "effective" unacked size of zero,
	        since sending the previous segments always sets unacked
		to non-zero and if the data fits in the window,
		we want to send this data as part of the loop. *)
	    val effective_unacked = if in_loop then zero32 else unacked
             (* if there is a zero send window and there are still
                unacked segments, we do nothing. Otherwise, we try to send. *)
	in if window_size <> zero32 orelse unacked = zero32 then
	     (* send immediately if the flag is on or we can send an
                MSS-sized packet or it's time to probe the window or
                other extenuating circumstances (the max send window
                is small, or all queued data fits in the window.)
                These conditions are captured by can_send.  *)
	    if (! send_immediately) orelse	(* Nagle algorithm *)
	       can_send (window_size, effective_unacked, ! max_snd_wnd, b4mss,
			 data_size) then
	     case next_buffer (data_queue, combine_limit) of
	        NONE =>	(* no data to send, no timers to start. *)
		 tcb
	      | SOME (dequeued, (first_buffer, buffer_size, urg, send_fun)) =>
		 let val (segment, segment_size, segment_send, final_queued) =
		      if Word32.<= (buffer_size, send_limit) then
		       (* send the whole buffer.  Since send_packet
			  enqueues packets in mss-sized buffers,
			  this should be the common case. *)
		       (make_segment (tcb, first_buffer, buffer_size, urg),
			buffer_size, send_fun, dequeued)
		      else
		      (* probe the window: send as much as fits in
		         the window but no less than one byte. *)
		       let val split = Word32.max (one32, send_limit)
		       in split_segment (first_buffer, split, tcb,
					 dequeued, urg)
		       end
		     val send_action = Tcp_Tcb.Send_Segment (segment,
							     segment_send)
		     val send_list = Tcp_Tcb.Q.add (! to_do, send_action)
		     val new_snd_nxt = Word32.+ (! snd_nxt, segment_size)
		 in snd_nxt := new_snd_nxt;
		    queued := final_queued;
		    to_do := send_list;
		    unacked_segs := 0;
		    Retransmit.retransmit (tcb, segment, ! srto);
		    if Word32.< (b4mss, window_size) orelse
		       Word32.< (segment_size, window_size) then
		     send_tcb (tcb, true)
		    else
		     (Trace.debug_constant_string "window full, send complete";
		      tcb)
	         end (* let *)
	    else (* cannot send now. *)
	     (* if the inability to send is NOT due to our having
	        outstanding unacked segments, then we better start
		the timer so we will try again to send later -- it
		may possibly be due to the receiver reducing its
		buffer space/window (RFC 1122, p. 99).  *)
	     (Trace.debug_print (fn _ =>
				 can_string (window_size, effective_unacked,
					     ! max_snd_wnd, b4mss, data_size));
	      if unacked = zero32 then
	       (Trace.debug_constant_string "restarting timer";
	        restart_timer tcb)
	      else
	       tcb)
	   else
	    (* window_size = 0 andalso unacked <> 0; set timer and wait. *)
	    (Trace.debug_print (fn _ =>
				"send window exhausted, window " ^
				Word32.fmt StringCvt.DEC window_size ^
				", first unacked " ^
				Word32.fmt StringCvt.DEC (! snd_una) ^
				", next " ^
				Word32.fmt StringCvt.DEC (! snd_nxt) ^
				", send window " ^
				Word32.fmt StringCvt.DEC (! snd_wnd) ^
				", sws " ^
				 Word32.fmt StringCvt.DEC (! rcv_sws) ^
				", cwnd " ^ Int.toString (! cwnd));
	     restart_timer tcb;
	     tcb)	(* zero window, wait for the next call. *)
	end (* let *)
        
(*
	11.	function send
*)

  in (* local *)
   fun send (Tcp_Tcb.Estab tcb) = Tcp_Tcb.Estab (send_tcb (tcb, false))
     | send (Tcp_Tcb.Close_Wait tcb) =
        Tcp_Tcb.Close_Wait (send_tcb (tcb, false))
     | send (Tcp_Tcb.Syn_Active tcb) =
        Tcp_Tcb.Syn_Active (send_tcb (tcb, false))
     | send (Tcp_Tcb.Syn_Passive (tcb, max_size)) =
        Tcp_Tcb.Syn_Passive (send_tcb (tcb, false), max_size)
	(* if we're in Fin_Wait_1 and fin_sent (the boolean) is false,
	   we are waiting to send the fin until all pending packets
	   are sent, so we should definitely allow sends.
	   If the fin has been sent, on the other hand, we disallow sends. *)
     | send (Tcp_Tcb.Fin_Wait_1 (tcb, false)) =
        Tcp_Tcb.Fin_Wait_1 (send_tcb (tcb, false), false)
     | send (Tcp_Tcb.Fin_Wait_1 (tcb, true)) =
        Tcp_Tcb.Fin_Wait_1 (connection_closing ("send(fw1/true)", tcb), true)
     | send (Tcp_Tcb.Closed (to_do, timers)) =
        (Tcp_Tcb.Q.add (! to_do,
			Tcp_Tcb.User_Error "send connection closed");
	 Tcp_Tcb.Closed (to_do, timers))
     | send (Tcp_Tcb.Listen (tcb, max_size)) =
        Tcp_Tcb.Listen (no_connection ("send(listen)", tcb), max_size)
     | send (Tcp_Tcb.Syn_Sent (tcb, max_size)) =
        Tcp_Tcb.Syn_Sent (no_connection ("send(syn-sent)", tcb), max_size)
     | send (Tcp_Tcb.Fin_Wait_2 tcb) =
        Tcp_Tcb.Fin_Wait_2 (connection_closing ("send(fw2)", tcb))
     | send (Tcp_Tcb.Closing tcb) =
        Tcp_Tcb.Closing (connection_closing ("send(fw2+)", tcb))
     | send (Tcp_Tcb.Last_Ack tcb) =
        Tcp_Tcb.Last_Ack (connection_closing ("send(la)", tcb))
     | send (state as (Tcp_Tcb.Time_Wait {to_do, timers, ...})) =
        (connection_closing_actions ("send(tw)", to_do, timers);
	 state)

(*
	12.	internal function send_packet

	[793] pp. 19-21

	Since we know nothing about the dynamic conditions when the
	packet will actually be sent, we allocate MSS-size chunks.
	The send procedure can then split them up or merge them as
	required.  Note that add_header_and_send is expected to fill
	in the TCP header.
*)

   exception Send_Packet of string

   fun send_packet (state, packet, urgent, add_header_and_send) =
        let val size = Outgoing.size packet
            val tcb = case state of
	                Tcp_Tcb.Estab tcb => tcb
		      | Tcp_Tcb.Close_Wait tcb => tcb
		      | Tcp_Tcb.Syn_Active tcb => tcb
		      | Tcp_Tcb.Syn_Passive (tcb, _) => tcb
		      | _ =>
			 let val state = Tcp_Tcb.state_string state
			     val string = "unexpected state " ^ state ^
			                  " in send_packet"
			 in Trace.print_raise (Send_Packet string, SOME string)
			 end
	    val Tcp_Tcb.Tcb {mss, queued, ...} = tcb
	    val max_segment = Word.fromInt (Word16.toInt (! mss))
	    fun queue_single (pkt, len) =
	         let val entry = (pkt, Word32.fromInt (Word.toInt len), urgent,
				  SOME add_header_and_send)
	         in queued := Tcp_Tcb.D.add_to_back (! queued, entry)
	         end
            fun send_it (pkt, len, state) =
	         (queue_single (pkt, len);
	          send state)
            fun fragment_send (packet, state) =
	         let val len = Outgoing.size packet
	         in if max_segment >= len then
		     send_it (packet, len, state)
	            else
		     let val (first, rest) = Outgoing.split (packet,
							     max_segment)
		     in queue_single (first, max_segment);
		        fragment_send (rest, state)
		     end
		 end
          in if size <= 0w0 then state
	     else
(* all paths from here should end in a call to send,
   generally by way of send_it. *)
	      if size <= max_segment then
	       send_it (packet, size, state)
	      else
	       fragment_send (packet, state)
          end (* let *)

   end (* local *)
(*
	13.	function send_state
*)

  fun send_state (Tcp_Tcb.Estab _) = Valid_Send
    | send_state (Tcp_Tcb.Close_Wait _) = Valid_Send
    | send_state (Tcp_Tcb.Syn_Active _) = Valid_Send
    | send_state (Tcp_Tcb.Syn_Passive _) = Valid_Send
    | send_state (Tcp_Tcb.Listen _) = Opening_Send
    | send_state (Tcp_Tcb.Syn_Sent _) = Opening_Send
    | send_state (Tcp_Tcb.Closed _) = Closing_Send
    | send_state (Tcp_Tcb.Fin_Wait_1 _) = Closing_Send
    | send_state (Tcp_Tcb.Fin_Wait_2 _) = Closing_Send
    | send_state (Tcp_Tcb.Closing _) = Closing_Send
    | send_state (Tcp_Tcb.Last_Ack _) = Closing_Send
    | send_state (Tcp_Tcb.Time_Wait _) = Closing_Send

 end (* struct *)
