reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements double-precision soft-float division
// with the IEEE-754 default rounding (to nearest, ties to even).
//
// For simplicity, this implementation currently flushes denormals to zero.
// It should be a fairly straightforward exercise to implement gradual
// underflow with correct rounding.
//
//===----------------------------------------------------------------------===//

#define DOUBLE_PRECISION
#include "fp_lib.h"

COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) {

  const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
  const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
  const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;

  rep_t aSignificand = toRep(a) & significandMask;
  rep_t bSignificand = toRep(b) & significandMask;
  int scale = 0;

  // Detect if a or b is zero, denormal, infinity, or NaN.
  if (aExponent - 1U >= maxExponent - 1U ||
      bExponent - 1U >= maxExponent - 1U) {

    const rep_t aAbs = toRep(a) & absMask;
    const rep_t bAbs = toRep(b) & absMask;

    // NaN / anything = qNaN
    if (aAbs > infRep)
      return fromRep(toRep(a) | quietBit);
    // anything / NaN = qNaN
    if (bAbs > infRep)
      return fromRep(toRep(b) | quietBit);

    if (aAbs == infRep) {
      // infinity / infinity = NaN
      if (bAbs == infRep)
        return fromRep(qnanRep);
      // infinity / anything else = +/- infinity
      else
        return fromRep(aAbs | quotientSign);
    }

    // anything else / infinity = +/- 0
    if (bAbs == infRep)
      return fromRep(quotientSign);

    if (!aAbs) {
      // zero / zero = NaN
      if (!bAbs)
        return fromRep(qnanRep);
      // zero / anything else = +/- zero
      else
        return fromRep(quotientSign);
    }
    // anything else / zero = +/- infinity
    if (!bAbs)
      return fromRep(infRep | quotientSign);

    // One or both of a or b is denormal.  The other (if applicable) is a
    // normal number.  Renormalize one or both of a and b, and set scale to
    // include the necessary exponent adjustment.
    if (aAbs < implicitBit)
      scale += normalize(&aSignificand);
    if (bAbs < implicitBit)
      scale -= normalize(&bSignificand);
  }

  // Set the implicit significand bit.  If we fell through from the
  // denormal path it was already set by normalize( ), but setting it twice
  // won't hurt anything.
  aSignificand |= implicitBit;
  bSignificand |= implicitBit;
  int quotientExponent = aExponent - bExponent + scale;

  // Align the significand of b as a Q31 fixed-point number in the range
  // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
  // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
  // is accurate to about 3.5 binary digits.
  const uint32_t q31b = bSignificand >> 21;
  uint32_t recip32 = UINT32_C(0x7504f333) - q31b;
  // 0x7504F333 / 2^32 + 1 = 3/4 + 1/sqrt(2)

  // Now refine the reciprocal estimate using a Newton-Raphson iteration:
  //
  //     x1 = x0 * (2 - x0 * b)
  //
  // This doubles the number of correct binary digits in the approximation
  // with each iteration.
  uint32_t correction32;
  correction32 = -((uint64_t)recip32 * q31b >> 32);
  recip32 = (uint64_t)recip32 * correction32 >> 31;
  correction32 = -((uint64_t)recip32 * q31b >> 32);
  recip32 = (uint64_t)recip32 * correction32 >> 31;
  correction32 = -((uint64_t)recip32 * q31b >> 32);
  recip32 = (uint64_t)recip32 * correction32 >> 31;

  // The reciprocal may have overflowed to zero if the upper half of b is
  // exactly 1.0.  This would sabatoge the full-width final stage of the
  // computation that follows, so we adjust the reciprocal down by one bit.
  recip32--;

  // We need to perform one more iteration to get us to 56 binary digits.
  // The last iteration needs to happen with extra precision.
  const uint32_t q63blo = bSignificand << 11;
  uint64_t correction, reciprocal;
  correction = -((uint64_t)recip32 * q31b + ((uint64_t)recip32 * q63blo >> 32));
  uint32_t cHi = correction >> 32;
  uint32_t cLo = correction;
  reciprocal = (uint64_t)recip32 * cHi + ((uint64_t)recip32 * cLo >> 32);

  // Adjust the final 64-bit reciprocal estimate downward to ensure that it is
  // strictly smaller than the infinitely precise exact reciprocal.  Because
  // the computation of the Newton-Raphson step is truncating at every step,
  // this adjustment is small; most of the work is already done.
  reciprocal -= 2;

  // The numerical reciprocal is accurate to within 2^-56, lies in the
  // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
  // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
  // in Q53 with the following properties:
  //
  //    1. q < a/b
  //    2. q is in the interval [0.5, 2.0)
  //    3. The error in q is bounded away from 2^-53 (actually, we have a
  //       couple of bits to spare, but this is all we need).

  // We need a 64 x 64 multiply high to compute q, which isn't a basic
  // operation in C, so we need to be a little bit fussy.
  rep_t quotient, quotientLo;
  wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);

  // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
  // In either case, we are going to compute a residual of the form
  //
  //     r = a - q*b
  //
  // We know from the construction of q that r satisfies:
  //
  //     0 <= r < ulp(q)*b
  //
  // If r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
  // already have the correct result.  The exact halfway case cannot occur.
  // We also take this time to right shift quotient if it falls in the [1,2)
  // range and adjust the exponent accordingly.
  rep_t residual;
  if (quotient < (implicitBit << 1)) {
    residual = (aSignificand << 53) - quotient * bSignificand;
    quotientExponent--;
  } else {
    quotient >>= 1;
    residual = (aSignificand << 52) - quotient * bSignificand;
  }

  const int writtenExponent = quotientExponent + exponentBias;

  if (writtenExponent >= maxExponent) {
    // If we have overflowed the exponent, return infinity.
    return fromRep(infRep | quotientSign);
  }

  else if (writtenExponent < 1) {
    if (writtenExponent == 0) {
      // Check whether the rounded result is normal.
      const bool round = (residual << 1) > bSignificand;
      // Clear the implicit bit.
      rep_t absResult = quotient & significandMask;
      // Round.
      absResult += round;
      if (absResult & ~significandMask) {
        // The rounded result is normal; return it.
        return fromRep(absResult | quotientSign);
      }
    }
    // Flush denormals to zero.  In the future, it would be nice to add
    // code to round them correctly.
    return fromRep(quotientSign);
  }

  else {
    const bool round = (residual << 1) > bSignificand;
    // Clear the implicit bit.
    rep_t absResult = quotient & significandMask;
    // Insert the exponent.
    absResult |= (rep_t)writtenExponent << significandBits;
    // Round.
    absResult += round;
    // Insert the sign and return.
    const double result = fromRep(absResult | quotientSign);
    return result;
  }
}

#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { return __divdf3(a, b); }
#else
COMPILER_RT_ALIAS(__divdf3, __aeabi_ddiv)
#endif
#endif