(* format.sml
 *
 * COPYRIGHT (c) 1992 by AT&T Bell Laboratories.  See COPYRIGHT file for details.
 *
 * AUTHOR:  John Reppy
 *	    AT&T Bell Laboratories
 *	    Murray Hill, NJ 07974
 *	    jhr@research.att.com
 *
 * TODO
 *   - field widths in scan
 *   - add PREC of (int * fmt_item) constructor to allow dynamic control of
 *     precision.
 *   - precision in %d, %s, ...
 *   - * flag in scan (checks, but doesn't scan input)
 *   - %n specifier in scan
 *)

structure Format : FORMAT =
  struct

    structure SS = Substring
    structure SC = StringCvt

    datatype fmt_item
      = INT of LargestInt.int
      | WORD of LargestWord.word
      | BOOL of bool
      | CHR of char
      | STR of string
      | REAL of LargestFloat.real
      | LEFT of (int * fmt_item)	(* left justify in field of given width *)
      | RIGHT of (int * fmt_item)	(* right justify in field of given width *)

    exception BadFormat
    exception BadFmtList

    fun padLeft (str, pad) = SC.padLeft #" " pad str
    fun padRight (str, pad) = SC.padRight #" " pad str
    fun zeroLPad (str, pad) = SC.padLeft #"0" pad str
    fun zeroRPad (str, pad) = SC.padRight #"0" pad str

  (* int to string conversions (for positive integers only) *)
    local
      val (maxInt8, maxInt10, maxInt16) = (case LargestInt.maxInt
	     of (SOME n) => let
		  val maxP1 = LargestWord.fromLargeInt n + 0w1
		  in
		    ( LargestWord.fmt SC.OCT maxP1,
		      LargestWord.fmt SC.DEC maxP1,
		      LargestWord.fmt SC.HEX maxP1
		    )
		  end
	      | NONE => ("", "", "")
	    (* end case *))
    in
    datatype posint = PosInt of int | MaxInt
    fun intToOctal MaxInt = maxInt8
      | intToOctal (PosInt i) = LargestInt.fmt SC.OCT i
    fun intToStr MaxInt = maxInt10
      | intToStr (PosInt i) = LargestInt.toString i
    fun intToHex MaxInt = maxInt16
      | intToHex (PosInt i) = LargestInt.fmt SC.HEX i
    fun intToHeX i =
	  String.implode (
	    CharVector.foldr (fn (c, l) => Char.toUpper c :: l) [] (intToHex i))
    end (* local *)

  (* string to int conversions *)
    val octToInt = LargestInt.scan SC.OCT
    val decToInt = LargestInt.scan SC.DEC
    val hexToInt = LargestInt.scan SC.HEX

  (* precompiled format specifiers *)
    datatype sign
      = DfltSign	(* default: put a sign on negative numbers *)
      | AlwaysSign	(* "+"      always has sign (+ or -) *)
      | BlankSign	(* " "      put a blank in the sign field for positive numbers *)
    datatype neg_sign
      = MinusSign	(* default: use "-" for negative numbers *)
      | TildeSign	(* "~"      use "~" for negative numbers *)
    type field_flags = {
	sign : sign,
	neg_char : neg_sign,
	zero_pad : bool,
	base : bool,
	ljust : bool
      }

    datatype field_wid = NoPad | Wid of int

    datatype real_format
      = F_Format		(* "%f" *)
      | E_Format of bool	(* "%e" or "%E" *)
      | G_Format of bool	(* "%g" or "%G" *)

    datatype field_type
      = OctalField
      | IntField
      | HexField
      | CapHexField
      | CharField
      | BoolField
      | StrField
      | RealField of {prec : int, format : real_format}

    datatype fmt_spec
      = Raw of substring
      | CharSet of char -> bool
      | Field of (field_flags * field_wid * field_type)

  (* character sets *)
    abstype charset = CS of Word8Array.array
    with
      fun mkCharSet () = CS(Word8Array.array(Char.maxOrd+1, 0w0))
      fun addChar (CS ba, c) = Word8Array.update(ba, Char.ord c, 0w1)
      fun addRange (CS ba, c1, c2) = let
	    val ord_c2 = Char.ord c2
	    fun add i = if (i <= ord_c2)
		  then (Word8Array.update(ba, i, 0w1); add(i+1))
		  else ()
	    in
	      if (c1 <= c2) then (add(Char.ord c1)) else raise BadFormat
	    end
      fun inSet (CS ba) arg = (Word8Array.sub(ba, Char.ord arg) = 0w1)
      fun notInSet (CS ba) arg = (Word8Array.sub(ba, Char.ord arg) = 0w0)
    end

  (* scan a field specification.  Assume that the previous character in the
   * base string was "%" and that the first character in the substring fmtStr
   * is not "%".
   *)
    fun scanFieldSpec fmtStr = let
	  val (fmtStr, flags) = let
		fun doFlags (ss, flags) = (case (SS.getc ss, flags)
		       of (SOME(#" ", ss'), {sign=AlwaysSign, ...}) =>
			    raise BadFormat
			| (SOME(#" ", ss'), {neg_char, zero_pad, base, ljust, ...}) =>
			    doFlags (ss', {
				sign = BlankSign, neg_char = neg_char,
				zero_pad = zero_pad, base = base, ljust = ljust
			      })
			| (SOME(#"+", ss'), {sign=BlankSign, ...}) =>
			    raise BadFormat
			| (SOME(#"+", ss'), {neg_char, zero_pad, base, ljust, ...}) =>
			    doFlags (ss', {
				sign = AlwaysSign, neg_char = neg_char,
				zero_pad = zero_pad, base = base, ljust = ljust
			      })
			| (SOME(#"~", ss'), {sign, zero_pad, base, ljust, ...}) =>
			    doFlags (ss', {
				sign = sign, neg_char = TildeSign,
				zero_pad = zero_pad, base = base, ljust = ljust
			      })
			| (SOME(#"-", ss'), {sign, neg_char, zero_pad, base, ...}) => 
			    doFlags (ss', {
				sign = sign, neg_char = neg_char,
				zero_pad = zero_pad, base = base, ljust = true
			      })
			| (SOME(#"#", ss'), {sign, neg_char, zero_pad, ljust, ...}) =>
			    doFlags (ss', {
				sign = sign, neg_char = neg_char,
				zero_pad = zero_pad, base = true, ljust = ljust
			      })
			| (SOME(#"0", ss'), {sign, neg_char, base, ljust, ...}) =>
			    (ss', {
				sign = sign, neg_char = neg_char,
				zero_pad = true, base = base, ljust = ljust
			      })
			| _ => (fmtStr, flags)
		      (* end case *))
		in
		  doFlags (fmtStr, {
		      sign = DfltSign, neg_char = MinusSign,
		      zero_pad = false, base = false, ljust = false
		    })
		end
	  val (wid, fmtStr) = if (Char.isDigit(valOf(SS.first fmtStr)))
		then let
		  val (n, fmtStr) = valOf (decToInt SS.getc fmtStr)
		  in (Wid n, fmtStr) end
		else (NoPad, fmtStr)
	  val (ty, fmtStr) = (case SS.getc fmtStr
		 of (SOME(#"d", ss)) => (IntField, ss)
		  | (SOME(#"X", ss)) => (CapHexField, ss)
		  | (SOME(#"x", ss)) => (HexField, ss)
		  | (SOME(#"o", ss)) => (OctalField, ss)
		  | (SOME(#"c", ss)) => (CharField, ss)
		  | (SOME(#"s", ss)) => (StrField, ss)
		  | (SOME(#"b", ss)) => (BoolField, ss)
		  | (SOME(#".", ss)) => let
(* NOTE: "." ought to be allowed for d,X,x,o and s formats as it is in ANSI C *)
		      val (n, ss) = valOf(decToInt SS.getc ss)
		      val (format, ss) = (case SS.getc ss
			     of (SOME(#"E" , ss))=> (E_Format true, ss)
			      | (SOME(#"e" , ss))=> (E_Format false, ss)
			      | (SOME(#"f" , ss))=> (F_Format, ss)
			      | (SOME(#"G" , ss))=> (G_Format true, ss)
			      | (SOME(#"g", ss)) => (G_Format false, ss)
			      | _ => raise BadFormat
			    (* end case *))
		      in
			(RealField{prec = n, format = format}, ss)
		      end
		  | (SOME(#"E", ss)) => (RealField{prec=6, format=E_Format true}, ss)
		  | (SOME(#"e", ss)) => (RealField{prec=6, format=E_Format false}, ss)
		  | (SOME(#"f", ss)) => (RealField{prec=6, format=F_Format}, ss)
		  | (SOME(#"G", ss)) => (RealField{prec=6, format=G_Format true}, ss)
		  | (SOME(#"g", ss)) => (RealField{prec=6, format=G_Format false}, ss)
		  | _ => raise BadFormat
		(* end case *))
	  in
	    (Field(flags, wid, ty), fmtStr)
	  end (* scanFieldSpec *)

    fun scanField fmtStr = (case SS.getc fmtStr
	   of (SOME(#"%", fmtStr')) => (Raw(SS.slice(fmtStr, 0, 1)), fmtStr')
	    | _ => scanFieldSpec fmtStr
	  (* end case *))

    fun scanCharSet fmtStr = let
	  val cset = mkCharSet()
	  val (isNot, fmtStr) = (case SS.getc fmtStr
		 of (SOME(#"^", ss)) => (true, ss)
		  | _ => (false, fmtStr)
		(* end case *))
	  fun scan (nextChar, ss) = (case (SS.getc ss)
		 of (SOME(#"-", ss)) => (case (SS.getc ss)
		       of (SOME(#"]", ss)) => (
			    addChar(cset, nextChar);
			    addChar(cset, #"-");
			    ss)
			| (SOME(c, ss)) => (
			    addRange(cset, nextChar, c);
			    scanNext ss)
			| NONE => raise BadFormat
		      (* end case *))
		  | (SOME(#"]", ss)) => (addChar(cset, nextChar); ss)
		  | (SOME(c, ss)) => (addChar(cset, nextChar); scan(c, ss))
		  | NONE => raise BadFormat
		(* end case *))
	  and scanNext ss = (case (SS.getc ss)
		 of (SOME(#"-", ss)) => raise BadFormat
		  | (SOME(#"]", ss)) => ss
		  | (SOME(c, ss)) => scan(c, ss)
		  | NONE => raise BadFormat
		(* end case *))
	  and scanChar (SOME arg) = scan arg
	    | scanChar NONE = raise BadFormat
	  val fmtStr = scanChar (SS.getc fmtStr)
	  in
	    if isNot
	      then (CharSet(notInSet cset), fmtStr)
	      else (CharSet(inSet cset), fmtStr)
	  end

    fun compileFormat str = let
	  val split = SS.splitl (fn #"%" => false | _ => true)
	  fun scan (ss, l) =
		if (SS.isEmpty ss)
		  then rev l
		  else let val (ss1, ss2) = split ss
		    in
		      case (SS.getc ss2)
		       of (SOME(#"%", ss')) => let val (field, ss3) = scanField ss'
			    in
			      scan(ss3, field::(Raw ss1)::l)
			    end
			| _ => rev((Raw ss1)::l)
		      (* end case *)
		    end
	  in
	    scan (Substring.all str, [])
	  end

    fun compileScanFormat str = let
	  val split = SS.splitl (Char.notContains "\n\t %[")
	  fun scan (ss, l) =
		if (SS.isEmpty ss)
		  then rev l
		  else let val (ss1, ss2) = split ss
		    in
		      case (SS.getc ss2)
		       of (SOME(#"%", ss')) => let val (field, ss3) = scanField ss'
			    in
			      scan(ss3, field :: (Raw ss1) :: l)
			    end
			| (SOME(#"[", ss')) => let val (cs, ss3) = scanCharSet ss'
			    in
			      scan (ss3, cs :: (Raw ss1) :: l)
			    end
			| (SOME(_, ss')) =>
			    scan (SS.dropl Char.isSpace ss', (Raw ss1) :: l)
			| NONE => rev((Raw ss1)::l)
		      (* end case *)
		    end
	  in
	    scan (SS.all str, [])
	  end

    fun format s = let
	  val fmts = compileFormat s
	  fun doField (flags, wid, ty, arg) = let
		fun padFn s = (case (#ljust flags, wid)
		       of (_, NoPad) => s
			| (false, Wid i) => padLeft(s, i)
			| (true, Wid i) => padRight(s, i)
		      (* end case *))
		fun zeroPadFn (sign, s) = (case wid
		       of NoPad => raise BadFormat
			| (Wid i) => zeroLPad(s, i - (String.size sign))
		      (* end case *))
		fun negate i = ((PosInt(~i)) handle _ => MaxInt)
		fun doSign i = (case (i < 0, #sign flags, #neg_char flags)
		       of (false, AlwaysSign, _) => ("+", PosInt i)
			| (false, BlankSign, _) => (" ", PosInt i)
			| (false, _, _) => ("", PosInt i)
			| (true, _, TildeSign) => ("~", negate i)
			| (true, _, _) => ("-", negate i)
		      (* end case *))
		fun doRealSign sign = (case (sign, #sign flags, #neg_char flags)
		       of (false, AlwaysSign, _) => "+"
			| (false, BlankSign, _) => " "
			| (false, _, _) => ""
			| (true, _, TildeSign) => "~"
			| (true, _, _) => "-"
		      (* end case *))
		fun doExpSign (exp, isCap) = let
		      val e = if isCap then "E" else "e"
		      fun mkExp e = zeroLPad(Int.toString e, 2)
		      in
			case (exp < 0, #neg_char flags)
			 of (false, _) => [e, mkExp exp]
			  | (true, TildeSign) => [e, "~", mkExp(~exp)]
			  | (true, _) => [e, "-", mkExp(~exp)]
			(* end case *)
		      end
		in
		  case (ty, arg)
		   of (OctalField, INT i) => let
		        val (sign, i) = doSign i
		        val sign = if (#base flags) then sign^"0" else sign
		        val s = intToOctal i
		        in
		          if (#zero_pad flags)
			    then sign ^ zeroPadFn(sign, s)
			    else padFn (sign ^ s)
		        end
		    | (IntField, INT i) => let
		        val (sign, i) = doSign i
			val s = intToStr i
		        in
			  if (#zero_pad flags)
			    then sign ^ zeroPadFn(sign, s)
		            else padFn (sign ^ s)
		        end
		    | (HexField, INT i) => let
		        val (sign, i) = doSign i
		        val sign = if (#base flags) then sign^"0x" else sign
		        val s = intToHex i 
		        in
		          if (#zero_pad flags)
			    then sign ^ zeroPadFn(sign, s)
			    else padFn (sign ^ s)
		        end
		    | (CapHexField, INT i) => let
		        val (sign, i) = doSign i
		        val sign = if (#base flags) then sign^"0X" else sign
		        val s = intToHeX i 
		        in
		          if (#zero_pad flags)
			    then sign ^ zeroPadFn(sign, s)
			    else padFn (sign ^ s)
		        end
		    | (CharField, CHR c) => padFn(String.str c)
		    | (BoolField, BOOL false) => padFn "false"
		    | (BoolField, BOOL true) => padFn "true"
		    | (StrField, STR s) => padFn s
		    | (RealField{prec, format=F_Format}, REAL r) => let
		        val {sign, mantissa} = RealFormat.realFFormat(r, prec)
		        val sign = doRealSign sign
		        in
		          if ((prec = 0) andalso (#base flags))
			    then padFn(concat[sign, mantissa, "."])
			    else padFn(sign ^ mantissa)
		        end
		    | (RealField{prec, format=E_Format isCap}, REAL r) => let
		        val {sign, mantissa, exp} = RealFormat.realEFormat(r, prec)
		        val sign = doRealSign sign
		        val expStr = doExpSign(exp, isCap)
		        in
		          if ((prec = 0) andalso (#base flags))
			    then padFn(concat(sign :: mantissa :: "." :: expStr))
			    else padFn(concat(sign :: mantissa :: expStr))
		        end
		    | (RealField{prec, format=G_Format isCap}, REAL r) => let
		        val prec = if (prec = 0) then 1 else prec
		        val {sign, whole, frac, exp} =
			      RealFormat.realGFormat(r, prec)
		        val sign = doRealSign sign
		        val expStr = (case exp
			       of SOME e => doExpSign(e, isCap)
			        | NONE => [])
		        val num = if (#base flags)
			        then let
			          val diff = prec - ((size whole) + (size frac))
			          in
				    if (diff > 0)
				      then zeroRPad(frac, (size frac)+diff)
				      else frac
			          end
			      else if (frac = "")
			        then ""
			        else ("." ^ frac)
		        in
		          padFn(concat(sign::whole::frac::expStr))
		        end
		    | (_, LEFT(w, arg)) => let
		        val flags = {
			        sign = (#sign flags), neg_char = (#neg_char flags),
			        zero_pad = (#zero_pad flags), base = (#base flags),
			        ljust = true
			      }
		        in
			  doField (flags, Wid w, ty, arg)
		        end
		    | (_, RIGHT(w, arg)) => doField (flags, Wid w, ty, arg)
		    | _ => raise BadFmtList
		  (* end case *)
		end
	  fun doArgs ([], [], l) = SS.concat(rev l)
	    | doArgs ((Raw s)::rf, args, l) = doArgs(rf, args, s::l)
	    | doArgs (Field(flags, wid, ty)::rf, arg::ra, l) =
		doArgs (rf, ra, SS.all (doField (flags, wid, ty, arg)) :: l)
	    | doArgs _ = raise BadFmtList
	  in
	    fn args => doArgs(fmts, args, [])
	  end (* format *)

    fun formatf fmt = let
	  val f = format fmt
	  in
	    fn consumer => fn args => consumer(f args)
	  end

(** NOTE: for the time being, this ignores flags and field width **)
    fun scanf fmt getc strm = let
	  val fmts = compileScanFormat fmt
	  val skipWS = SC.dropl Char.isSpace getc
	  fun scan (strm, [], items) = SOME(rev items, strm)
	    | scan (strm, (Raw ss)::rf, items) = let
		fun match (strm, ss) = (case (getc strm, SS.getc ss)
		       of (SOME(c', strm'), SOME(c, ss)) =>
			    if (c' = c) then match (strm', ss) else NONE
			| (_, NONE) => scan (strm, rf, items)
			| _ => NONE
		      (* end case *))
		in
		  match (skipWS strm, ss)
		end
	    | scan (strm, (CharSet pred)::rf, items) = let
		fun scanSet strm = (case (getc strm)
		       of (SOME(c, strm')) =>
			    if (pred c) then scanSet strm' else strm
			| NONE => strm
		    (* end case *))
		in
		  scan (scanSet strm, rf, items)
		end
	    | scan (strm, Field(flags, wid, ty)::rf, items) = (let
		val strm = skipWS strm
		fun getInt fmt = let
		      val SOME(n, strm) = LargestInt.scan fmt getc strm
		      in
			(INT n, strm)
		      end
		val (item, strm) = (case ty
		       of OctalField => getInt SC.OCT
			| IntField => getInt SC.DEC
			| HexField => getInt SC.HEX
			| CapHexField => getInt SC.HEX
			| CharField => let val SOME(c, strm) = getc strm
			    in
			      (CHR c, strm)
			    end
			| BoolField => let
			    val SOME(b, strm) = Bool.scan getc strm
			    in
			      (BOOL b, strm)
			    end
			| StrField => let
			    val notSpace = not o Char.isSpace
			    val pred = (case wid
				   of NoPad => notSpace
				    | (Wid n) => let val cnt = ref n
					in
					  fn c => (case !cnt
					     of 0 => false
					      | n => (cnt := n-1; notSpace c)
					    (* end case *))
					end
				  (* end case *))
			    val (s, strm) = SC.splitl pred getc strm
			    in
			      (STR s, strm)
			    end
			| (RealField _) => let
			    val SOME(r, strm) = LargestFloat.scan getc strm
			    in
			      (REAL r, strm)
			    end
		      (* end case *))
		in
		  scan (strm, rf, item::items)
		end
		  handle Overflow => raise Overflow
		       | _ => NONE)
	  in
	    scan(strm, fmts, [])
	  end (* scanf *)

    fun sscanf fmt = SC.scanString (scanf fmt)

  end (* Format *)
