|
@@ -1866,3 +1866,98 @@ float64 float64_round_to_int( struct roundingData *roundData, float64 a )
|
|
|
int16 aExp;
|
|
|
bits64 lastBitMask, roundBitsMask;
|
|
|
int8 roundingMode;
|
|
|
+ float64 z;
|
|
|
+
|
|
|
+ aExp = extractFloat64Exp( a );
|
|
|
+ if ( 0x433 <= aExp ) {
|
|
|
+ if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
|
|
|
+ return propagateFloat64NaN( a, a );
|
|
|
+ }
|
|
|
+ return a;
|
|
|
+ }
|
|
|
+ if ( aExp <= 0x3FE ) {
|
|
|
+ if ( (bits64) ( a<<1 ) == 0 ) return a;
|
|
|
+ roundData->exception |= float_flag_inexact;
|
|
|
+ aSign = extractFloat64Sign( a );
|
|
|
+ switch ( roundData->mode ) {
|
|
|
+ case float_round_nearest_even:
|
|
|
+ if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
|
|
|
+ return packFloat64( aSign, 0x3FF, 0 );
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case float_round_down:
|
|
|
+ return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
|
|
|
+ case float_round_up:
|
|
|
+ return
|
|
|
+ aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
|
|
|
+ }
|
|
|
+ return packFloat64( aSign, 0, 0 );
|
|
|
+ }
|
|
|
+ lastBitMask = 1;
|
|
|
+ lastBitMask <<= 0x433 - aExp;
|
|
|
+ roundBitsMask = lastBitMask - 1;
|
|
|
+ z = a;
|
|
|
+ roundingMode = roundData->mode;
|
|
|
+ if ( roundingMode == float_round_nearest_even ) {
|
|
|
+ z += lastBitMask>>1;
|
|
|
+ if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
|
|
|
+ }
|
|
|
+ else if ( roundingMode != float_round_to_zero ) {
|
|
|
+ if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
|
|
|
+ z += roundBitsMask;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ z &= ~ roundBitsMask;
|
|
|
+ if ( z != a ) roundData->exception |= float_flag_inexact;
|
|
|
+ return z;
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of adding the absolute values of the double-precision
|
|
|
+floating-point values `a' and `b'. If `zSign' is true, the sum is negated
|
|
|
+before being returned. `zSign' is ignored if the result is a NaN. The
|
|
|
+addition is performed according to the IEC/IEEE Standard for Binary
|
|
|
+Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
|
|
|
+{
|
|
|
+ int16 aExp, bExp, zExp;
|
|
|
+ bits64 aSig, bSig, zSig;
|
|
|
+ int16 expDiff;
|
|
|
+
|
|
|
+ aSig = extractFloat64Frac( a );
|
|
|
+ aExp = extractFloat64Exp( a );
|
|
|
+ bSig = extractFloat64Frac( b );
|
|
|
+ bExp = extractFloat64Exp( b );
|
|
|
+ expDiff = aExp - bExp;
|
|
|
+ aSig <<= 9;
|
|
|
+ bSig <<= 9;
|
|
|
+ if ( 0 < expDiff ) {
|
|
|
+ if ( aExp == 0x7FF ) {
|
|
|
+ if ( aSig ) return propagateFloat64NaN( a, b );
|
|
|
+ return a;
|
|
|
+ }
|
|
|
+ if ( bExp == 0 ) {
|
|
|
+ --expDiff;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ bSig |= LIT64( 0x2000000000000000 );
|
|
|
+ }
|
|
|
+ shift64RightJamming( bSig, expDiff, &bSig );
|
|
|
+ zExp = aExp;
|
|
|
+ }
|
|
|
+ else if ( expDiff < 0 ) {
|
|
|
+ if ( bExp == 0x7FF ) {
|
|
|
+ if ( bSig ) return propagateFloat64NaN( a, b );
|
|
|
+ return packFloat64( zSign, 0x7FF, 0 );
|
|
|
+ }
|
|
|
+ if ( aExp == 0 ) {
|
|
|
+ ++expDiff;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ aSig |= LIT64( 0x2000000000000000 );
|
|
|
+ }
|
|
|
+ shift64RightJamming( aSig, - expDiff, &aSig );
|