|
@@ -1961,3 +1961,201 @@ static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float6
|
|
|
aSig |= LIT64( 0x2000000000000000 );
|
|
|
}
|
|
|
shift64RightJamming( aSig, - expDiff, &aSig );
|
|
|
+ zExp = bExp;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if ( aExp == 0x7FF ) {
|
|
|
+ if ( aSig | bSig ) return propagateFloat64NaN( a, b );
|
|
|
+ return a;
|
|
|
+ }
|
|
|
+ if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
|
|
|
+ zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
|
|
|
+ zExp = aExp;
|
|
|
+ goto roundAndPack;
|
|
|
+ }
|
|
|
+ aSig |= LIT64( 0x2000000000000000 );
|
|
|
+ zSig = ( aSig + bSig )<<1;
|
|
|
+ --zExp;
|
|
|
+ if ( (sbits64) zSig < 0 ) {
|
|
|
+ zSig = aSig + bSig;
|
|
|
+ ++zExp;
|
|
|
+ }
|
|
|
+ roundAndPack:
|
|
|
+ return roundAndPackFloat64( roundData, zSign, zExp, zSig );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of subtracting the absolute values of the double-
|
|
|
+precision floating-point values `a' and `b'. If `zSign' is true, the
|
|
|
+difference is negated before being returned. `zSign' is ignored if the
|
|
|
+result is a NaN. The subtraction is performed according to the IEC/IEEE
|
|
|
+Standard for Binary Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
|
|
|
+{
|
|
|
+ int16 aExp, bExp, zExp;
|
|
|
+ bits64 aSig, bSig, zSig;
|
|
|
+ int16 expDiff;
|
|
|
+
|
|
|
+ aSig = extractFloat64Frac( a );
|
|
|
+ aExp = extractFloat64Exp( a );
|
|
|
+ bSig = extractFloat64Frac( b );
|
|
|
+ bExp = extractFloat64Exp( b );
|
|
|
+ expDiff = aExp - bExp;
|
|
|
+ aSig <<= 10;
|
|
|
+ bSig <<= 10;
|
|
|
+ if ( 0 < expDiff ) goto aExpBigger;
|
|
|
+ if ( expDiff < 0 ) goto bExpBigger;
|
|
|
+ if ( aExp == 0x7FF ) {
|
|
|
+ if ( aSig | bSig ) return propagateFloat64NaN( a, b );
|
|
|
+ roundData->exception |= float_flag_invalid;
|
|
|
+ return float64_default_nan;
|
|
|
+ }
|
|
|
+ if ( aExp == 0 ) {
|
|
|
+ aExp = 1;
|
|
|
+ bExp = 1;
|
|
|
+ }
|
|
|
+ if ( bSig < aSig ) goto aBigger;
|
|
|
+ if ( aSig < bSig ) goto bBigger;
|
|
|
+ return packFloat64( roundData->mode == float_round_down, 0, 0 );
|
|
|
+ bExpBigger:
|
|
|
+ if ( bExp == 0x7FF ) {
|
|
|
+ if ( bSig ) return propagateFloat64NaN( a, b );
|
|
|
+ return packFloat64( zSign ^ 1, 0x7FF, 0 );
|
|
|
+ }
|
|
|
+ if ( aExp == 0 ) {
|
|
|
+ ++expDiff;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ aSig |= LIT64( 0x4000000000000000 );
|
|
|
+ }
|
|
|
+ shift64RightJamming( aSig, - expDiff, &aSig );
|
|
|
+ bSig |= LIT64( 0x4000000000000000 );
|
|
|
+ bBigger:
|
|
|
+ zSig = bSig - aSig;
|
|
|
+ zExp = bExp;
|
|
|
+ zSign ^= 1;
|
|
|
+ goto normalizeRoundAndPack;
|
|
|
+ aExpBigger:
|
|
|
+ if ( aExp == 0x7FF ) {
|
|
|
+ if ( aSig ) return propagateFloat64NaN( a, b );
|
|
|
+ return a;
|
|
|
+ }
|
|
|
+ if ( bExp == 0 ) {
|
|
|
+ --expDiff;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ bSig |= LIT64( 0x4000000000000000 );
|
|
|
+ }
|
|
|
+ shift64RightJamming( bSig, expDiff, &bSig );
|
|
|
+ aSig |= LIT64( 0x4000000000000000 );
|
|
|
+ aBigger:
|
|
|
+ zSig = aSig - bSig;
|
|
|
+ zExp = aExp;
|
|
|
+ normalizeRoundAndPack:
|
|
|
+ --zExp;
|
|
|
+ return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of adding the double-precision floating-point values `a'
|
|
|
+and `b'. The operation is performed according to the IEC/IEEE Standard for
|
|
|
+Binary Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
|
|
|
+{
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ aSign = extractFloat64Sign( a );
|
|
|
+ bSign = extractFloat64Sign( b );
|
|
|
+ if ( aSign == bSign ) {
|
|
|
+ return addFloat64Sigs( roundData, a, b, aSign );
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ return subFloat64Sigs( roundData, a, b, aSign );
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of subtracting the double-precision floating-point values
|
|
|
+`a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
|
|
+for Binary Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
|
|
|
+{
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ aSign = extractFloat64Sign( a );
|
|
|
+ bSign = extractFloat64Sign( b );
|
|
|
+ if ( aSign == bSign ) {
|
|
|
+ return subFloat64Sigs( roundData, a, b, aSign );
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ return addFloat64Sigs( roundData, a, b, aSign );
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of multiplying the double-precision floating-point values
|
|
|
+`a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
|
|
+for Binary Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
|
|
|
+{
|
|
|
+ flag aSign, bSign, zSign;
|
|
|
+ int16 aExp, bExp, zExp;
|
|
|
+ bits64 aSig, bSig, zSig0, zSig1;
|
|
|
+
|
|
|
+ aSig = extractFloat64Frac( a );
|
|
|
+ aExp = extractFloat64Exp( a );
|
|
|
+ aSign = extractFloat64Sign( a );
|
|
|
+ bSig = extractFloat64Frac( b );
|
|
|
+ bExp = extractFloat64Exp( b );
|
|
|
+ bSign = extractFloat64Sign( b );
|
|
|
+ zSign = aSign ^ bSign;
|
|
|
+ if ( aExp == 0x7FF ) {
|
|
|
+ if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
|
|
|
+ return propagateFloat64NaN( a, b );
|
|
|
+ }
|
|
|
+ if ( ( bExp | bSig ) == 0 ) {
|
|
|
+ roundData->exception |= float_flag_invalid;
|
|
|
+ return float64_default_nan;
|
|
|
+ }
|
|
|
+ return packFloat64( zSign, 0x7FF, 0 );
|
|
|
+ }
|
|
|
+ if ( bExp == 0x7FF ) {
|
|
|
+ if ( bSig ) return propagateFloat64NaN( a, b );
|
|
|
+ if ( ( aExp | aSig ) == 0 ) {
|
|
|
+ roundData->exception |= float_flag_invalid;
|
|
|
+ return float64_default_nan;
|
|
|
+ }
|
|
|
+ return packFloat64( zSign, 0x7FF, 0 );
|
|
|
+ }
|
|
|
+ if ( aExp == 0 ) {
|
|
|
+ if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
|
|
|
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
|
|
|
+ }
|
|
|
+ if ( bExp == 0 ) {
|
|
|
+ if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
|
|
|
+ normalizeFloat64Subnormal( bSig, &bExp, &bSig );
|
|
|
+ }
|
|
|
+ zExp = aExp + bExp - 0x3FF;
|
|
|
+ aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
|
|
|
+ bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
|
|
|
+ mul64To128( aSig, bSig, &zSig0, &zSig1 );
|
|
|
+ zSig0 |= ( zSig1 != 0 );
|
|
|
+ if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
|
|
|
+ zSig0 <<= 1;
|
|
|
+ --zExp;
|