|
@@ -229,3 +229,171 @@ static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign,
|
|
|
roundIncrement = 0;
|
|
|
}
|
|
|
else {
|
|
|
+ roundIncrement = 0x7F;
|
|
|
+ if ( zSign ) {
|
|
|
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ roundBits = zSig & 0x7F;
|
|
|
+ if ( 0xFD <= (bits16) zExp ) {
|
|
|
+ if ( ( 0xFD < zExp )
|
|
|
+ || ( ( zExp == 0xFD )
|
|
|
+ && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
|
|
|
+ ) {
|
|
|
+ roundData->exception |= float_flag_overflow | float_flag_inexact;
|
|
|
+ return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
|
|
|
+ }
|
|
|
+ if ( zExp < 0 ) {
|
|
|
+ isTiny =
|
|
|
+ ( float_detect_tininess == float_tininess_before_rounding )
|
|
|
+ || ( zExp < -1 )
|
|
|
+ || ( zSig + roundIncrement < 0x80000000 );
|
|
|
+ shift32RightJamming( zSig, - zExp, &zSig );
|
|
|
+ zExp = 0;
|
|
|
+ roundBits = zSig & 0x7F;
|
|
|
+ if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ( roundBits ) roundData->exception |= float_flag_inexact;
|
|
|
+ zSig = ( zSig + roundIncrement )>>7;
|
|
|
+ zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
|
|
|
+ if ( zSig == 0 ) zExp = 0;
|
|
|
+ return packFloat32( zSign, zExp, zSig );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
|
+and significand `zSig', and returns the proper single-precision floating-
|
|
|
+point value corresponding to the abstract input. This routine is just like
|
|
|
+`roundAndPackFloat32' except that `zSig' does not have to be normalized in
|
|
|
+any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
|
|
|
+point exponent.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+static float32
|
|
|
+ normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
|
|
|
+{
|
|
|
+ int8 shiftCount;
|
|
|
+
|
|
|
+ shiftCount = countLeadingZeros32( zSig ) - 1;
|
|
|
+ return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the fraction bits of the double-precision floating-point value `a'.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+INLINE bits64 extractFloat64Frac( float64 a )
|
|
|
+{
|
|
|
+
|
|
|
+ return a & LIT64( 0x000FFFFFFFFFFFFF );
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the exponent bits of the double-precision floating-point value `a'.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+INLINE int16 extractFloat64Exp( float64 a )
|
|
|
+{
|
|
|
+
|
|
|
+ return ( a>>52 ) & 0x7FF;
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Returns the sign bit of the double-precision floating-point value `a'.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+#if 0 /* in softfloat.h */
|
|
|
+INLINE flag extractFloat64Sign( float64 a )
|
|
|
+{
|
|
|
+
|
|
|
+ return a>>63;
|
|
|
+
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Normalizes the subnormal double-precision floating-point value represented
|
|
|
+by the denormalized significand `aSig'. The normalized exponent and
|
|
|
+significand are stored at the locations pointed to by `zExpPtr' and
|
|
|
+`zSigPtr', respectively.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+static void
|
|
|
+ normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
|
|
|
+{
|
|
|
+ int8 shiftCount;
|
|
|
+
|
|
|
+ shiftCount = countLeadingZeros64( aSig ) - 11;
|
|
|
+ *zSigPtr = aSig<<shiftCount;
|
|
|
+ *zExpPtr = 1 - shiftCount;
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
|
|
+double-precision floating-point value, returning the result. After being
|
|
|
+shifted into the proper positions, the three fields are simply added
|
|
|
+together to form the result. This means that any integer portion of `zSig'
|
|
|
+will be added into the exponent. Since a properly normalized significand
|
|
|
+will have an integer portion equal to 1, the `zExp' input should be 1 less
|
|
|
+than the desired result exponent whenever `zSig' is a complete, normalized
|
|
|
+significand.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
|
|
|
+{
|
|
|
+
|
|
|
+ return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
|
+and significand `zSig', and returns the proper double-precision floating-
|
|
|
+point value corresponding to the abstract input. Ordinarily, the abstract
|
|
|
+value is simply rounded and packed into the double-precision format, with
|
|
|
+the inexact exception raised if the abstract input cannot be represented
|
|
|
+exactly. If the abstract value is too large, however, the overflow and
|
|
|
+inexact exceptions are raised and an infinity or maximal finite value is
|
|
|
+returned. If the abstract value is too small, the input value is rounded to
|
|
|
+a subnormal number, and the underflow and inexact exceptions are raised if
|
|
|
+the abstract input cannot be represented exactly as a subnormal double-
|
|
|
+precision floating-point number.
|
|
|
+ The input significand `zSig' has its binary point between bits 62
|
|
|
+and 61, which is 10 bits to the left of the usual location. This shifted
|
|
|
+significand must be normalized or smaller. If `zSig' is not normalized,
|
|
|
+`zExp' must be 0; in that case, the result returned is a subnormal number,
|
|
|
+and it must not require rounding. In the usual case that `zSig' is
|
|
|
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
|
|
|
+The handling of underflow and overflow follows the IEC/IEEE Standard for
|
|
|
+Binary Floating-point Arithmetic.
|
|
|
+-------------------------------------------------------------------------------
|
|
|
+*/
|
|
|
+static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
|
|
|
+{
|
|
|
+ int8 roundingMode;
|
|
|
+ flag roundNearestEven;
|
|
|
+ int16 roundIncrement, roundBits;
|
|
|
+ flag isTiny;
|
|
|
+
|
|
|
+ roundingMode = roundData->mode;
|
|
|
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
|
|
|
+ roundIncrement = 0x200;
|
|
|
+ if ( ! roundNearestEven ) {
|
|
|
+ if ( roundingMode == float_round_to_zero ) {
|