// fixed_point.c : utility routines for fixed point computations using native integer types
// Copyright (c) 2005-2007 Garth Zeglin

// This file is part of ArtLPC. 

// ArtLPC is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.

// ArtLPC is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with ArtLPC; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

// ---------------------------------------------------------------------
#include <libstd.h>

// Multiply two fixed numbers in 0.31 format: a sign bit followed by a
// 31 bit fraction. The intermediate value is in 1.62 format.

// This is substantially slower than regular 32-bit multiplication on
// a LPC2103, but still roughly a factor of four times faster than
// floating point emulation.

// This can be used for other fixed point formats that happen to need
// to drop 31 intermediate bits.  Notably, this is true for regular
// integers (31.0 format) when multiplied by normal fixed point (0.31
// format) to yield an integer.  Some quick rules to remember about
// the scaling:
//  fixed_mul( fixed, fixed )  is fixed    0.31 * 0.31 -> 0.31
//  fixed_mul( fixed, int   )  is int      0.31 * 31.0 -> 31.0
//             fixed * int     is fixed    0.31 * 31.0 -> 0.31  (with overflow potential)

fixed_point_t fixed_mul( fixed_point_t i1, fixed_point_t i2 )
{
  long long accum = (long long) i1 * (long long) i2;
  return (fixed_point_t) (accum >> 31);
}

// Addition of fixed point numbers with the same precision can just
// use ordinary integer arithmetic, although no overflow checks are
// performed.
fixed_point_t fixed_add( fixed_point_t i1, fixed_point_t i2 ) 
{
  return i1 + i2; 
}

// The quotient of a 0.31/0.31 integer division is 31.0 format, so
// this shifts the numerator up before the division to produce a 0.31
// result with precision preserved.
fixed_point_t fixed_div( fixed_point_t i1, fixed_point_t i2 )
{
  return (((long long) i1)<<31) / (long long) i2;
}

/****************************************************************/
// Fixed point multiplication for mantissa lengths m0 and m1,
// producing a fixed point mantissa length m2.  The intermediate value
// has a mantissa of m0+m1.

// This is only about 40% faster than using the floating point
// emulation on a LPC2103.

fixed_point_t fixed_mul_n( fixed_point_t i1, fixed_point_t i2, int m0, int m1, int m2 )
{
  long long accum = (long long) i1 * (long long) i2;
  int shift = m0+m1-m2;
  if ( shift > 0)      return (fixed_point_t) (accum >>  shift);
  else if ( shift < 0) return (fixed_point_t) (accum << -shift);
  else                 return (fixed_point_t) (accum);
}

/****************************************************************/
// Fixed point division for mantissa lengths m0 and m1, producing a
// fixed point mantissa length m2. 

fixed_point_t fixed_div_n( fixed_point_t i1, fixed_point_t i2, int m0, int m1, int m2 )
{
  // First compute the divison at maximum precision by shifting up the
  // numerator to be the largest possible value.  The intermediate
  // quotient then has this format:
  //   ().(m0+31) / ().(m1) -> ().(m0+31-m1)
  long long accum = (((long long) i1)<<31) / (long long) i2;

  // Compute the shift to the output format.
  int shift = m0+31-m1 - m2;

  if ( shift > 0)      return (fixed_point_t) (accum >>  shift);
  else if ( shift < 0) return (fixed_point_t) (accum << -shift);
  else                 return (fixed_point_t) (accum);
}
/****************************************************************/
// Fixed point addition for mantissa lengths m0 and m1, producing a
// fixed point mantissa length m2. 

fixed_point_t fixed_add_n( fixed_point_t i0, fixed_point_t i1, int m0, int m1, int m2 )
{
  // Align the addends to an intermediate 32.31 precision before adding.
  long long accum = (((long long) i0) << (31-m0)) + (((long long) i1) << (31-m1));

  // Then downshift to the output precision.
  return (fixed_point_t) (accum >> (31-m2));
}

/****************************************************************/
// Fixed point multiplication using only 32-bit arithmetic for
// mantissa lengths m0 and m1, producing a fixed point mantissa length
// m2.  The intermediate value would normally have a mantissa of
// m0+m1, so the source values are prescaled to avoid overflow.

// This is only about 25% faster than fixed_mul_n at a substantial
// penalty in accuracy.  However, a hand-optimized 32-bit multiply
// sequence with constant shift operations can be roughly 16 times
// faster than fixed_mul_n.

fixed_point_t fast_fixed_mul_n( fixed_point_t i1, fixed_point_t i2, int m0, int m1, int m2 )
{
  fixed_point_t accum;
  int mproduct = m0 + m1;  // size of the intermediate result

  // if result will fit within the output format
  if (mproduct <= m2) {
    accum = i1*i2;    
  } else {
    // else intermediate will overflow, prescale the multiplicands
    int prescale = (mproduct - m2 + 1)/2;  // round up the prescale
    accum = (i1 >> prescale) * (i2 >> prescale);
    mproduct = (m0-prescale) + (m1-prescale);
  }
  // shift the intermediate result to the correct output precision
  if ( mproduct > m2 ) return (accum >> (mproduct-m2));
  else if (mproduct < m2) return (accum << (m2-mproduct));
  else return accum;
}
/****************************************************************/
