|
@@ -1691,3 +1691,108 @@ int32 float64_to_int32_round_to_zero( float64 a )
|
|
|
if ( shiftCount < 21 ) {
|
|
|
if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
|
|
|
goto invalid;
|
|
|
+ }
|
|
|
+ else if ( 52 < shiftCount ) {
|
|
|
+ if ( aExp || aSig ) float_raise( float_flag_inexact );
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ aSig |= LIT64( 0x0010000000000000 );
|
|
|
+ savedASig = aSig;
|
|
|
+ aSig >>= shiftCount;
|
|
|
+ z = aSig;
|
|
|
+ if ( aSign ) z = - z;
|
|
|
+ if ( ( z < 0 ) ^ aSign ) {
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ return aSign ? 0x80000000 : 0x7FFFFFFF;
|
|
|
+ }
|
|
|
+ if ( ( aSig<<shiftCount ) != savedASig ) {
|
|
|
+ float_raise( float_flag_inexact );
|
|
|
+ }
|
|
|
+ return z;
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of converting the double-precision floating-point value
|
|
|
+`a' to the 32-bit two's complement unsigned integer format. The conversion
|
|
|
+is performed according to the IEC/IEEE Standard for Binary Floating-point
|
|
|
+Arithmetic---which means in particular that the conversion is rounded
|
|
|
+according to the current rounding mode. If `a' is a NaN, the largest
|
|
|
+positive integer is returned. Otherwise, if the conversion overflows, the
|
|
|
+largest positive integer is returned.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+int32 float64_to_uint32( struct roundingData *roundData, float64 a )
|
|
|
+{
|
|
|
+ flag aSign;
|
|
|
+ int16 aExp, shiftCount;
|
|
|
+ bits64 aSig;
|
|
|
+
|
|
|
+ aSig = extractFloat64Frac( a );
|
|
|
+ aExp = extractFloat64Exp( a );
|
|
|
+ aSign = 0; //extractFloat64Sign( a );
|
|
|
+ //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
|
|
|
+ if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
|
|
|
+ shiftCount = 0x42C - aExp;
|
|
|
+ if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
|
|
|
+ return roundAndPackInt32( roundData, aSign, aSig );
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of converting the double-precision floating-point value
|
|
|
+`a' to the 32-bit two's complement integer format. The conversion is
|
|
|
+performed according to the IEC/IEEE Standard for Binary Floating-point
|
|
|
+Arithmetic, except that the conversion is always rounded toward zero. If
|
|
|
+`a' is a NaN, the largest positive integer is returned. Otherwise, if the
|
|
|
+conversion overflows, the largest positive integer is returned.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+int32 float64_to_uint32_round_to_zero( float64 a )
|
|
|
+{
|
|
|
+ flag aSign;
|
|
|
+ int16 aExp, shiftCount;
|
|
|
+ bits64 aSig, savedASig;
|
|
|
+ int32 z;
|
|
|
+
|
|
|
+ aSig = extractFloat64Frac( a );
|
|
|
+ aExp = extractFloat64Exp( a );
|
|
|
+ aSign = extractFloat64Sign( a );
|
|
|
+ shiftCount = 0x433 - aExp;
|
|
|
+ if ( shiftCount < 21 ) {
|
|
|
+ if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
|
|
|
+ goto invalid;
|
|
|
+ }
|
|
|
+ else if ( 52 < shiftCount ) {
|
|
|
+ if ( aExp || aSig ) float_raise( float_flag_inexact );
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ aSig |= LIT64( 0x0010000000000000 );
|
|
|
+ savedASig = aSig;
|
|
|
+ aSig >>= shiftCount;
|
|
|
+ z = aSig;
|
|
|
+ if ( aSign ) z = - z;
|
|
|
+ if ( ( z < 0 ) ^ aSign ) {
|
|
|
+ invalid:
|
|
|
+ float_raise( float_flag_invalid );
|
|
|
+ return aSign ? 0x80000000 : 0x7FFFFFFF;
|
|
|
+ }
|
|
|
+ if ( ( aSig<<shiftCount ) != savedASig ) {
|
|
|
+ float_raise( float_flag_inexact );
|
|
|
+ }
|
|
|
+ return z;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the result of converting the double-precision floating-point value
|
|
|
+`a' to the single-precision floating-point format. The conversion is
|
|
|
+performed according to the IEC/IEEE Standard for Binary Floating-point
|
|
|
+Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+float32 float64_to_float32( struct roundingData *roundData, float64 a )
|
|
|
+{
|
|
|
+ flag aSign;
|