|
@@ -2493,3 +2493,136 @@ flag float64_le_quiet( float64 a, float64 b )
|
|
|
/* Do nothing, even if NaN as we're quiet */
|
|
|
return 0;
|
|
|
}
|
|
|
+ aSign = extractFloat64Sign( a );
|
|
|
+ bSign = extractFloat64Sign( b );
|
|
|
+ if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
|
|
|
+ return ( a == b ) || ( aSign ^ ( a < b ) );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns 1 if the double-precision floating-point value `a' is less than
|
|
|
+the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
|
|
|
+exception. Otherwise, the comparison is performed according to the IEC/IEEE
|
|
|
+Standard for Binary Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+flag float64_lt_quiet( float64 a, float64 b )
|
|
|
+{
|
|
|
+ flag aSign, bSign;
|
|
|
+
|
|
|
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
|
|
|
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
|
|
|
+ ) {
|
|
|
+ /* Do nothing, even if NaN as we're quiet */
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ aSign = extractFloat64Sign( a );
|
|
|
+ bSign = extractFloat64Sign( b );
|
|
|
+ if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
|
|
|
+ return ( a != b ) && ( aSign ^ ( a < b ) );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef FLOATX80
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of converting the extended double-precision floating-
|
|
|
+point value `a' to the 32-bit two's complement integer format. The
|
|
|
+conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+Floating-point Arithmetic---which means in particular that the conversion
|
|
|
+is rounded according to the current rounding mode. If `a' is a NaN, the
|
|
|
+largest positive integer is returned. Otherwise, if the conversion
|
|
|
+overflows, the largest integer with the same sign as `a' is returned.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
|
|
|
+{
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig;
|
|
|
+
|
|
|
+ aSig = extractFloatx80Frac( a );
|
|
|
+ aExp = extractFloatx80Exp( a );
|
|
|
+ aSign = extractFloatx80Sign( a );
|
|
|
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
|
|
|
+ shiftCount = 0x4037 - aExp;
|
|
|
+ if ( shiftCount <= 0 ) shiftCount = 1;
|
|
|
+ shift64RightJamming( aSig, shiftCount, &aSig );
|
|
|
+ return roundAndPackInt32( roundData, aSign, aSig );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of converting the extended double-precision floating-
|
|
|
+point value `a' to the 32-bit two's complement integer format. The
|
|
|
+conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+Floating-point Arithmetic, except that the conversion is always rounded
|
|
|
+toward zero. If `a' is a NaN, the largest positive integer is returned.
|
|
|
+Otherwise, if the conversion overflows, the largest integer with the same
|
|
|
+sign as `a' is returned.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
|
|
|
+{
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp, shiftCount;
|
|
|
+ bits64 aSig, savedASig;
|
|
|
+ int32 z;
|
|
|
+
|
|
|
+ aSig = extractFloatx80Frac( a );
|
|
|
+ aExp = extractFloatx80Exp( a );
|
|
|
+ aSign = extractFloatx80Sign( a );
|
|
|
+ shiftCount = 0x403E - aExp;
|
|
|
+ if ( shiftCount < 32 ) {
|
|
|
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
|
|
|
+ goto invalid;
|
|
|
+ }
|
|
|
+ else if ( 63 < shiftCount ) {
|
|
|
+ if ( aExp || aSig ) float_raise( float_flag_inexact );
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ savedASig = aSig;
|
|
|
+ aSig >>= shiftCount;
|
|
|
+ z = aSig;
|
|
|
+ if ( aSign ) z = - z;
|
|
|
+ if ( ( z < 0 ) ^ aSign ) {
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ return aSign ? 0x80000000 : 0x7FFFFFFF;
|
|
|
+ }
|
|
|
+ if ( ( aSig<<shiftCount ) != savedASig ) {
|
|
|
+ float_raise( float_flag_inexact );
|
|
|
+ }
|
|
|
+ return z;
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of converting the extended double-precision floating-
|
|
|
+point value `a' to the single-precision floating-point format. The
|
|
|
+conversion is performed according to the IEC/IEEE Standard for Binary
|
|
|
+Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
|
|
|
+{
|
|
|
+ flag aSign;
|
|
|
+ int32 aExp;
|
|
|
+ bits64 aSig;
|
|
|
+
|
|
|
+ aSig = extractFloatx80Frac( a );
|
|
|
+ aExp = extractFloatx80Exp( a );
|
|
|
+ aSign = extractFloatx80Sign( a );
|
|
|
+ if ( aExp == 0x7FFF ) {
|
|
|
+ if ( (bits64) ( aSig<<1 ) ) {
|
|
|
+ return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
|
|
|
+ }
|
|
|
+ return packFloat32( aSign, 0xFF, 0 );
|
|
|
+ }
|
|
|
+ shift64RightJamming( aSig, 33, &aSig );
|
|
|
+ if ( aExp || aSig ) aExp -= 0x3F81;
|