You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

8467 lines
235 KiB

/*
* Tiny arbitrary precision floating point library
*
* Copyright (c) 2017-2021 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <math.h>
#include <string.h>
#include <assert.h>
#ifdef __AVX2__
#include <immintrin.h>
#endif
#include "cutils.h"
#include "libbf.h"
/* enable it to check the multiplication result */
//#define USE_MUL_CHECK
/* enable it to use FFT/NTT multiplication */
#define USE_FFT_MUL
/* enable decimal floating point support */
#define USE_BF_DEC
//#define inline __attribute__((always_inline))
#ifdef __AVX2__
#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */
#else
#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */
#endif
/* XXX: adjust */
#define DIVNORM_LARGE_THRESHOLD 50
#define UDIV1NORM_THRESHOLD 3
#if LIMB_BITS == 64
#define FMT_LIMB1 "%" PRIx64
#define FMT_LIMB "%016" PRIx64
#define PRId_LIMB PRId64
#define PRIu_LIMB PRIu64
#else
#define FMT_LIMB1 "%x"
#define FMT_LIMB "%08x"
#define PRId_LIMB "d"
#define PRIu_LIMB "u"
#endif
typedef intptr_t mp_size_t;
typedef int bf_op2_func_t(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags);
#ifdef USE_FFT_MUL
#define FFT_MUL_R_OVERLAP_A (1 << 0)
#define FFT_MUL_R_OVERLAP_B (1 << 1)
#define FFT_MUL_R_NORESIZE (1 << 2)
static no_inline int fft_mul(bf_context_t *s,
bf_t *res, limb_t *a_tab, limb_t a_len,
limb_t *b_tab, limb_t b_len, int mul_flags);
static void fft_clear_cache(bf_context_t *s);
#endif
#ifdef USE_BF_DEC
static limb_t get_digit(const limb_t *tab, limb_t len, slimb_t pos);
#endif
/* could leading zeros */
static inline int clz(limb_t a)
{
if (a == 0) {
return LIMB_BITS;
} else {
#if LIMB_BITS == 64
return clz64(a);
#else
return clz32(a);
#endif
}
}
static inline int ctz(limb_t a)
{
if (a == 0) {
return LIMB_BITS;
} else {
#if LIMB_BITS == 64
return ctz64(a);
#else
return ctz32(a);
#endif
}
}
static inline int ceil_log2(limb_t a)
{
if (a <= 1)
return 0;
else
return LIMB_BITS - clz(a - 1);
}
/* b must be >= 1 */
static inline slimb_t ceil_div(slimb_t a, slimb_t b)
{
if (a >= 0)
return (a + b - 1) / b;
else
return a / b;
}
/* b must be >= 1 */
static inline slimb_t floor_div(slimb_t a, slimb_t b)
{
if (a >= 0) {
return a / b;
} else {
return (a - b + 1) / b;
}
}
/* return r = a modulo b (0 <= r <= b - 1. b must be >= 1 */
static inline limb_t smod(slimb_t a, slimb_t b)
{
a = a % (slimb_t)b;
if (a < 0)
a += b;
return a;
}
/* signed addition with saturation */
static inline slimb_t sat_add(slimb_t a, slimb_t b)
{
slimb_t r;
r = a + b;
/* overflow ? */
if (((a ^ r) & (b ^ r)) < 0)
r = (a >> (LIMB_BITS - 1)) ^ (((limb_t)1 << (LIMB_BITS - 1)) - 1);
return r;
}
#define malloc(s) malloc_is_forbidden(s)
#define free(p) free_is_forbidden(p)
#define realloc(p, s) realloc_is_forbidden(p, s)
void bf_context_init(bf_context_t *s, bf_realloc_func_t *realloc_func,
void *realloc_opaque)
{
memset(s, 0, sizeof(*s));
s->realloc_func = realloc_func;
s->realloc_opaque = realloc_opaque;
}
void bf_context_end(bf_context_t *s)
{
bf_clear_cache(s);
}
void bf_init(bf_context_t *s, bf_t *r)
{
r->ctx = s;
r->sign = 0;
r->expn = BF_EXP_ZERO;
r->len = 0;
r->tab = NULL;
}
/* return 0 if OK, -1 if alloc error */
int bf_resize(bf_t *r, limb_t len)
{
limb_t *tab;
if (len != r->len) {
tab = bf_realloc(r->ctx, r->tab, len * sizeof(limb_t));
if (!tab && len != 0)
return -1;
r->tab = tab;
r->len = len;
}
return 0;
}
/* return 0 or BF_ST_MEM_ERROR */
int bf_set_ui(bf_t *r, uint64_t a)
{
r->sign = 0;
if (a == 0) {
r->expn = BF_EXP_ZERO;
bf_resize(r, 0); /* cannot fail */
}
#if LIMB_BITS == 32
else if (a <= 0xffffffff)
#else
else
#endif
{
int shift;
if (bf_resize(r, 1))
goto fail;
shift = clz(a);
r->tab[0] = a << shift;
r->expn = LIMB_BITS - shift;
}
#if LIMB_BITS == 32
else {
uint32_t a1, a0;
int shift;
if (bf_resize(r, 2))
goto fail;
a0 = a;
a1 = a >> 32;
shift = clz(a1);
r->tab[0] = a0 << shift;
r->tab[1] = (a1 << shift) | (a0 >> (LIMB_BITS - shift));
r->expn = 2 * LIMB_BITS - shift;
}
#endif
return 0;
fail:
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
/* return 0 or BF_ST_MEM_ERROR */
int bf_set_si(bf_t *r, int64_t a)
{
int ret;
if (a < 0) {
ret = bf_set_ui(r, -a);
r->sign = 1;
} else {
ret = bf_set_ui(r, a);
}
return ret;
}
void bf_set_nan(bf_t *r)
{
bf_resize(r, 0); /* cannot fail */
r->expn = BF_EXP_NAN;
r->sign = 0;
}
void bf_set_zero(bf_t *r, int is_neg)
{
bf_resize(r, 0); /* cannot fail */
r->expn = BF_EXP_ZERO;
r->sign = is_neg;
}
void bf_set_inf(bf_t *r, int is_neg)
{
bf_resize(r, 0); /* cannot fail */
r->expn = BF_EXP_INF;
r->sign = is_neg;
}
/* return 0 or BF_ST_MEM_ERROR */
int bf_set(bf_t *r, const bf_t *a)
{
if (r == a)
return 0;
if (bf_resize(r, a->len)) {
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
r->sign = a->sign;
r->expn = a->expn;
memcpy(r->tab, a->tab, a->len * sizeof(limb_t));
return 0;
}
/* equivalent to bf_set(r, a); bf_delete(a) */
void bf_move(bf_t *r, bf_t *a)
{
bf_context_t *s = r->ctx;
if (r == a)
return;
bf_free(s, r->tab);
*r = *a;
}
static limb_t get_limbz(const bf_t *a, limb_t idx)
{
if (idx >= a->len)
return 0;
else
return a->tab[idx];
}
/* get LIMB_BITS at bit position 'pos' in tab */
static inline limb_t get_bits(const limb_t *tab, limb_t len, slimb_t pos)
{
limb_t i, a0, a1;
int p;
i = pos >> LIMB_LOG2_BITS;
p = pos & (LIMB_BITS - 1);
if (i < len)
a0 = tab[i];
else
a0 = 0;
if (p == 0) {
return a0;
} else {
i++;
if (i < len)
a1 = tab[i];
else
a1 = 0;
return (a0 >> p) | (a1 << (LIMB_BITS - p));
}
}
static inline limb_t get_bit(const limb_t *tab, limb_t len, slimb_t pos)
{
slimb_t i;
i = pos >> LIMB_LOG2_BITS;
if (i < 0 || i >= len)
return 0;
return (tab[i] >> (pos & (LIMB_BITS - 1))) & 1;
}
static inline limb_t limb_mask(int start, int last)
{
limb_t v;
int n;
n = last - start + 1;
if (n == LIMB_BITS)
v = -1;
else
v = (((limb_t)1 << n) - 1) << start;
return v;
}
static limb_t mp_scan_nz(const limb_t *tab, mp_size_t n)
{
mp_size_t i;
for(i = 0; i < n; i++) {
if (tab[i] != 0)
return 1;
}
return 0;
}
/* return != 0 if one bit between 0 and bit_pos inclusive is not zero. */
static inline limb_t scan_bit_nz(const bf_t *r, slimb_t bit_pos)
{
slimb_t pos;
limb_t v;
pos = bit_pos >> LIMB_LOG2_BITS;
if (pos < 0)
return 0;
v = r->tab[pos] & limb_mask(0, bit_pos & (LIMB_BITS - 1));
if (v != 0)
return 1;
pos--;
while (pos >= 0) {
if (r->tab[pos] != 0)
return 1;
pos--;
}
return 0;
}
/* return the addend for rounding. Note that prec can be <= 0 (for
BF_FLAG_RADPNT_PREC) */
static int bf_get_rnd_add(int *pret, const bf_t *r, limb_t l,
slimb_t prec, int rnd_mode)
{
int add_one, inexact;
limb_t bit1, bit0;
if (rnd_mode == BF_RNDF) {
bit0 = 1; /* faithful rounding does not honor the INEXACT flag */
} else {
/* starting limb for bit 'prec + 1' */
bit0 = scan_bit_nz(r, l * LIMB_BITS - 1 - bf_max(0, prec + 1));
}
/* get the bit at 'prec' */
bit1 = get_bit(r->tab, l, l * LIMB_BITS - 1 - prec);
inexact = (bit1 | bit0) != 0;
add_one = 0;
switch(rnd_mode) {
case BF_RNDZ:
break;
case BF_RNDN:
if (bit1) {
if (bit0) {
add_one = 1;
} else {
/* round to even */
add_one =
get_bit(r->tab, l, l * LIMB_BITS - 1 - (prec - 1));
}
}
break;
case BF_RNDD:
case BF_RNDU:
if (r->sign == (rnd_mode == BF_RNDD))
add_one = inexact;
break;
case BF_RNDA:
add_one = inexact;
break;
case BF_RNDNA:
case BF_RNDF:
add_one = bit1;
break;
default:
abort();
}
if (inexact)
*pret |= BF_ST_INEXACT;
return add_one;
}
static int bf_set_overflow(bf_t *r, int sign, limb_t prec, bf_flags_t flags)
{
slimb_t i, l, e_max;
int rnd_mode;
rnd_mode = flags & BF_RND_MASK;
if (prec == BF_PREC_INF ||
rnd_mode == BF_RNDN ||
rnd_mode == BF_RNDNA ||
rnd_mode == BF_RNDA ||
(rnd_mode == BF_RNDD && sign == 1) ||
(rnd_mode == BF_RNDU && sign == 0)) {
bf_set_inf(r, sign);
} else {
/* set to maximum finite number */
l = (prec + LIMB_BITS - 1) / LIMB_BITS;
if (bf_resize(r, l)) {
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
r->tab[0] = limb_mask((-prec) & (LIMB_BITS - 1),
LIMB_BITS - 1);
for(i = 1; i < l; i++)
r->tab[i] = (limb_t)-1;
e_max = (limb_t)1 << (bf_get_exp_bits(flags) - 1);
r->expn = e_max;
r->sign = sign;
}
return BF_ST_OVERFLOW | BF_ST_INEXACT;
}
/* round to prec1 bits assuming 'r' is non zero and finite. 'r' is
assumed to have length 'l' (1 <= l <= r->len). Note: 'prec1' can be
infinite (BF_PREC_INF). 'ret' is 0 or BF_ST_INEXACT if the result
is known to be inexact. Can fail with BF_ST_MEM_ERROR in case of
overflow not returning infinity. */
static int __bf_round(bf_t *r, limb_t prec1, bf_flags_t flags, limb_t l,
int ret)
{
limb_t v, a;
int shift, add_one, rnd_mode;
slimb_t i, bit_pos, pos, e_min, e_max, e_range, prec;
/* e_min and e_max are computed to match the IEEE 754 conventions */
e_range = (limb_t)1 << (bf_get_exp_bits(flags) - 1);
e_min = -e_range + 3;
e_max = e_range;
if (flags & BF_FLAG_RADPNT_PREC) {
/* 'prec' is the precision after the radix point */
if (prec1 != BF_PREC_INF)
prec = r->expn + prec1;
else
prec = prec1;
} else if (unlikely(r->expn < e_min) && (flags & BF_FLAG_SUBNORMAL)) {
/* restrict the precision in case of potentially subnormal
result */
assert(prec1 != BF_PREC_INF);
prec = prec1 - (e_min - r->expn);
} else {
prec = prec1;
}
/* round to prec bits */
rnd_mode = flags & BF_RND_MASK;
add_one = bf_get_rnd_add(&ret, r, l, prec, rnd_mode);
if (prec <= 0) {
if (add_one) {
bf_resize(r, 1); /* cannot fail */
r->tab[0] = (limb_t)1 << (LIMB_BITS - 1);
r->expn += 1 - prec;
ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT;
return ret;
} else {
goto underflow;
}
} else if (add_one) {
limb_t carry;
/* add one starting at digit 'prec - 1' */
bit_pos = l * LIMB_BITS - 1 - (prec - 1);
pos = bit_pos >> LIMB_LOG2_BITS;
carry = (limb_t)1 << (bit_pos & (LIMB_BITS - 1));
for(i = pos; i < l; i++) {
v = r->tab[i] + carry;
carry = (v < carry);
r->tab[i] = v;
if (carry == 0)
break;
}
if (carry) {
/* shift right by one digit */
v = 1;
for(i = l - 1; i >= pos; i--) {
a = r->tab[i];
r->tab[i] = (a >> 1) | (v << (LIMB_BITS - 1));
v = a;
}
r->expn++;
}
}
/* check underflow */
if (unlikely(r->expn < e_min)) {
if (flags & BF_FLAG_SUBNORMAL) {
/* if inexact, also set the underflow flag */
if (ret & BF_ST_INEXACT)
ret |= BF_ST_UNDERFLOW;
} else {
underflow:
ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT;
bf_set_zero(r, r->sign);
return ret;
}
}
/* check overflow */
if (unlikely(r->expn > e_max))
return bf_set_overflow(r, r->sign, prec1, flags);
/* keep the bits starting at 'prec - 1' */
bit_pos = l * LIMB_BITS - 1 - (prec - 1);
i = bit_pos >> LIMB_LOG2_BITS;
if (i >= 0) {
shift = bit_pos & (LIMB_BITS - 1);
if (shift != 0)
r->tab[i] &= limb_mask(shift, LIMB_BITS - 1);
} else {
i = 0;
}
/* remove trailing zeros */
while (r->tab[i] == 0)
i++;
if (i > 0) {
l -= i;
memmove(r->tab, r->tab + i, l * sizeof(limb_t));
}
bf_resize(r, l); /* cannot fail */
return ret;
}
/* 'r' must be a finite number. */
int bf_normalize_and_round(bf_t *r, limb_t prec1, bf_flags_t flags)
{
limb_t l, v, a;
int shift, ret;
slimb_t i;
// bf_print_str("bf_renorm", r);
l = r->len;
while (l > 0 && r->tab[l - 1] == 0)
l--;
if (l == 0) {
/* zero */
r->expn = BF_EXP_ZERO;
bf_resize(r, 0); /* cannot fail */
ret = 0;
} else {
r->expn -= (r->len - l) * LIMB_BITS;
/* shift to have the MSB set to '1' */
v = r->tab[l - 1];
shift = clz(v);
if (shift != 0) {
v = 0;
for(i = 0; i < l; i++) {
a = r->tab[i];
r->tab[i] = (a << shift) | (v >> (LIMB_BITS - shift));
v = a;
}
r->expn -= shift;
}
ret = __bf_round(r, prec1, flags, l, 0);
}
// bf_print_str("r_final", r);
return ret;
}
/* return true if rounding can be done at precision 'prec' assuming
the exact result r is such that |r-a| <= 2^(EXP(a)-k). */
/* XXX: check the case where the exponent would be incremented by the
rounding */
int bf_can_round(const bf_t *a, slimb_t prec, bf_rnd_t rnd_mode, slimb_t k)
{
BOOL is_rndn;
slimb_t bit_pos, n;
limb_t bit;
if (a->expn == BF_EXP_INF || a->expn == BF_EXP_NAN)
return FALSE;
if (rnd_mode == BF_RNDF) {
return (k >= (prec + 1));
}
if (a->expn == BF_EXP_ZERO)
return FALSE;
is_rndn = (rnd_mode == BF_RNDN || rnd_mode == BF_RNDNA);
if (k < (prec + 2))
return FALSE;
bit_pos = a->len * LIMB_BITS - 1 - prec;
n = k - prec;
/* bit pattern for RNDN or RNDNA: 0111.. or 1000...
for other rounding modes: 000... or 111...
*/
bit = get_bit(a->tab, a->len, bit_pos);
bit_pos--;
n--;
bit ^= is_rndn;
/* XXX: slow, but a few iterations on average */
while (n != 0) {
if (get_bit(a->tab, a->len, bit_pos) != bit)
return TRUE;
bit_pos--;
n--;
}
return FALSE;
}
/* Cannot fail with BF_ST_MEM_ERROR. */
int bf_round(bf_t *r, limb_t prec, bf_flags_t flags)
{
if (r->len == 0)
return 0;
return __bf_round(r, prec, flags, r->len, 0);
}
/* for debugging */
static __maybe_unused void dump_limbs(const char *str, const limb_t *tab, limb_t n)
{
limb_t i;
printf("%s: len=%" PRId_LIMB "\n", str, n);
for(i = 0; i < n; i++) {
printf("%" PRId_LIMB ": " FMT_LIMB "\n",
i, tab[i]);
}
}
void mp_print_str(const char *str, const limb_t *tab, limb_t n)
{
slimb_t i;
printf("%s= 0x", str);
for(i = n - 1; i >= 0; i--) {
if (i != (n - 1))
printf("_");
printf(FMT_LIMB, tab[i]);
}
printf("\n");
}
static __maybe_unused void mp_print_str_h(const char *str,
const limb_t *tab, limb_t n,
limb_t high)
{
slimb_t i;
printf("%s= 0x", str);
printf(FMT_LIMB, high);
for(i = n - 1; i >= 0; i--) {
printf("_");
printf(FMT_LIMB, tab[i]);
}
printf("\n");
}
/* for debugging */
void bf_print_str(const char *str, const bf_t *a)
{
slimb_t i;
printf("%s=", str);
if (a->expn == BF_EXP_NAN) {
printf("NaN");
} else {
if (a->sign)
putchar('-');
if (a->expn == BF_EXP_ZERO) {
putchar('0');
} else if (a->expn == BF_EXP_INF) {
printf("Inf");
} else {
printf("0x0.");
for(i = a->len - 1; i >= 0; i--)
printf(FMT_LIMB, a->tab[i]);
printf("p%" PRId_LIMB, a->expn);
}
}
printf("\n");
}
/* compare the absolute value of 'a' and 'b'. Return < 0 if a < b, 0
if a = b and > 0 otherwise. */
int bf_cmpu(const bf_t *a, const bf_t *b)
{
slimb_t i;
limb_t len, v1, v2;
if (a->expn != b->expn) {
if (a->expn < b->expn)
return -1;
else
return 1;
}
len = bf_max(a->len, b->len);
for(i = len - 1; i >= 0; i--) {
v1 = get_limbz(a, a->len - len + i);
v2 = get_limbz(b, b->len - len + i);
if (v1 != v2) {
if (v1 < v2)
return -1;
else
return 1;
}
}
return 0;
}
/* Full order: -0 < 0, NaN == NaN and NaN is larger than all other numbers */
int bf_cmp_full(const bf_t *a, const bf_t *b)
{
int res;
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
if (a->expn == b->expn)
res = 0;
else if (a->expn == BF_EXP_NAN)
res = 1;
else
res = -1;
} else if (a->sign != b->sign) {
res = 1 - 2 * a->sign;
} else {
res = bf_cmpu(a, b);
if (a->sign)
res = -res;
}
return res;
}
/* Standard floating point comparison: return 2 if one of the operands
is NaN (unordered) or -1, 0, 1 depending on the ordering assuming
-0 == +0 */
int bf_cmp(const bf_t *a, const bf_t *b)
{
int res;
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
res = 2;
} else if (a->sign != b->sign) {
if (a->expn == BF_EXP_ZERO && b->expn == BF_EXP_ZERO)
res = 0;
else
res = 1 - 2 * a->sign;
} else {
res = bf_cmpu(a, b);
if (a->sign)
res = -res;
}
return res;
}
/* Compute the number of bits 'n' matching the pattern:
a= X1000..0
b= X0111..1
When computing a-b, the result will have at least n leading zero
bits.
Precondition: a > b and a.expn - b.expn = 0 or 1
*/
static limb_t count_cancelled_bits(const bf_t *a, const bf_t *b)
{
slimb_t bit_offset, b_offset, n;
int p, p1;
limb_t v1, v2, mask;
bit_offset = a->len * LIMB_BITS - 1;
b_offset = (b->len - a->len) * LIMB_BITS - (LIMB_BITS - 1) +
a->expn - b->expn;
n = 0;
/* first search the equals bits */
for(;;) {
v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS);
v2 = get_bits(b->tab, b->len, bit_offset + b_offset);
// printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2);
if (v1 != v2)
break;
n += LIMB_BITS;
bit_offset -= LIMB_BITS;
}
/* find the position of the first different bit */
p = clz(v1 ^ v2) + 1;
n += p;
/* then search for '0' in a and '1' in b */
p = LIMB_BITS - p;
if (p > 0) {
/* search in the trailing p bits of v1 and v2 */
mask = limb_mask(0, p - 1);
p1 = bf_min(clz(v1 & mask), clz((~v2) & mask)) - (LIMB_BITS - p);
n += p1;
if (p1 != p)
goto done;
}
bit_offset -= LIMB_BITS;
for(;;) {
v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS);
v2 = get_bits(b->tab, b->len, bit_offset + b_offset);
// printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2);
if (v1 != 0 || v2 != -1) {
/* different: count the matching bits */
p1 = bf_min(clz(v1), clz(~v2));
n += p1;
break;
}
n += LIMB_BITS;
bit_offset -= LIMB_BITS;
}
done:
return n;
}
static int bf_add_internal(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags, int b_neg)
{
const bf_t *tmp;
int is_sub, ret, cmp_res, a_sign, b_sign;
a_sign = a->sign;
b_sign = b->sign ^ b_neg;
is_sub = a_sign ^ b_sign;
cmp_res = bf_cmpu(a, b);
if (cmp_res < 0) {
tmp = a;
a = b;
b = tmp;
a_sign = b_sign; /* b_sign is never used later */
}
/* abs(a) >= abs(b) */
if (cmp_res == 0 && is_sub && a->expn < BF_EXP_INF) {
/* zero result */
bf_set_zero(r, (flags & BF_RND_MASK) == BF_RNDD);
ret = 0;
} else if (a->len == 0 || b->len == 0) {
ret = 0;
if (a->expn >= BF_EXP_INF) {
if (a->expn == BF_EXP_NAN) {
/* at least one operand is NaN */
bf_set_nan(r);
} else if (b->expn == BF_EXP_INF && is_sub) {
/* infinities with different signs */
bf_set_nan(r);
ret = BF_ST_INVALID_OP;
} else {
bf_set_inf(r, a_sign);
}
} else {
/* at least one zero and not subtract */
bf_set(r, a);
r->sign = a_sign;
goto renorm;
}
} else {
slimb_t d, a_offset, b_bit_offset, i, cancelled_bits;
limb_t carry, v1, v2, u, r_len, carry1, precl, tot_len, z, sub_mask;
r->sign = a_sign;
r->expn = a->expn;
d = a->expn - b->expn;
/* must add more precision for the leading cancelled bits in
subtraction */
if (is_sub) {
if (d <= 1)
cancelled_bits = count_cancelled_bits(a, b);
else
cancelled_bits = 1;
} else {
cancelled_bits = 0;
}
/* add two extra bits for rounding */
precl = (cancelled_bits + prec + 2 + LIMB_BITS - 1) / LIMB_BITS;
tot_len = bf_max(a->len, b->len + (d + LIMB_BITS - 1) / LIMB_BITS);
r_len = bf_min(precl, tot_len);
if (bf_resize(r, r_len))
goto fail;
a_offset = a->len - r_len;
b_bit_offset = (b->len - r_len) * LIMB_BITS + d;
/* compute the bits before for the rounding */
carry = is_sub;
z = 0;
sub_mask = -is_sub;
i = r_len - tot_len;
while (i < 0) {
slimb_t ap, bp;
BOOL inflag;
ap = a_offset + i;
bp = b_bit_offset + i * LIMB_BITS;
inflag = FALSE;
if (ap >= 0 && ap < a->len) {
v1 = a->tab[ap];
inflag = TRUE;
} else {
v1 = 0;
}
if (bp + LIMB_BITS > 0 && bp < (slimb_t)(b->len * LIMB_BITS)) {
v2 = get_bits(b->tab, b->len, bp);
inflag = TRUE;
} else {
v2 = 0;
}
if (!inflag) {
/* outside 'a' and 'b': go directly to the next value
inside a or b so that the running time does not
depend on the exponent difference */
i = 0;
if (ap < 0)
i = bf_min(i, -a_offset);
/* b_bit_offset + i * LIMB_BITS + LIMB_BITS >= 1
equivalent to
i >= ceil(-b_bit_offset + 1 - LIMB_BITS) / LIMB_BITS)
*/
if (bp + LIMB_BITS <= 0)
i = bf_min(i, (-b_bit_offset) >> LIMB_LOG2_BITS);
} else {
i++;
}
v2 ^= sub_mask;
u = v1 + v2;
carry1 = u < v1;
u += carry;
carry = (u < carry) | carry1;
z |= u;
}
/* and the result */
for(i = 0; i < r_len; i++) {
v1 = get_limbz(a, a_offset + i);
v2 = get_bits(b->tab, b->len, b_bit_offset + i * LIMB_BITS);
v2 ^= sub_mask;
u = v1 + v2;
carry1 = u < v1;
u += carry;
carry = (u < carry) | carry1;
r->tab[i] = u;
}
/* set the extra bits for the rounding */
r->tab[0] |= (z != 0);
/* carry is only possible in add case */
if (!is_sub && carry) {
if (bf_resize(r, r_len + 1))
goto fail;
r->tab[r_len] = 1;
r->expn += LIMB_BITS;
}
renorm:
ret = bf_normalize_and_round(r, prec, flags);
}
return ret;
fail:
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
static int __bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
return bf_add_internal(r, a, b, prec, flags, 0);
}
static int __bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
return bf_add_internal(r, a, b, prec, flags, 1);
}
limb_t mp_add(limb_t *res, const limb_t *op1, const limb_t *op2,
limb_t n, limb_t carry)
{
slimb_t i;
limb_t k, a, v, k1;
k = carry;
for(i=0;i<n;i++) {
v = op1[i];
a = v + op2[i];
k1 = a < v;
a = a + k;
k = (a < k) | k1;
res[i] = a;
}
return k;
}
limb_t mp_add_ui(limb_t *tab, limb_t b, size_t n)
{
size_t i;
limb_t k, a;
k=b;
for(i=0;i<n;i++) {
if (k == 0)
break;
a = tab[i] + k;
k = (a < k);
tab[i] = a;
}
return k;
}
limb_t mp_sub(limb_t *res, const limb_t *op1, const limb_t *op2,
mp_size_t n, limb_t carry)
{
int i;
limb_t k, a, v, k1;
k = carry;
for(i=0;i<n;i++) {
v = op1[i];
a = v - op2[i];
k1 = a > v;
v = a - k;
k = (v > a) | k1;
res[i] = v;
}
return k;
}
/* compute 0 - op2 */
static limb_t mp_neg(limb_t *res, const limb_t *op2, mp_size_t n, limb_t carry)
{
int i;
limb_t k, a, v, k1;
k = carry;
for(i=0;i<n;i++) {
v = 0;
a = v - op2[i];
k1 = a > v;
v = a - k;
k = (v > a) | k1;
res[i] = v;
}
return k;
}
limb_t mp_sub_ui(limb_t *tab, limb_t b, mp_size_t n)
{
mp_size_t i;
limb_t k, a, v;
k=b;
for(i=0;i<n;i++) {
v = tab[i];
a = v - k;
k = a > v;
tab[i] = a;
if (k == 0)
break;
}
return k;
}
/* r = (a + high*B^n) >> shift. Return the remainder r (0 <= r < 2^shift).
1 <= shift <= LIMB_BITS - 1 */
static limb_t mp_shr(limb_t *tab_r, const limb_t *tab, mp_size_t n,
int shift, limb_t high)
{
mp_size_t i;
limb_t l, a;
assert(shift >= 1 && shift < LIMB_BITS);
l = high;
for(i = n - 1; i >= 0; i--) {
a = tab[i];
tab_r[i] = (a >> shift) | (l << (LIMB_BITS - shift));
l = a;
}
return l & (((limb_t)1 << shift) - 1);
}
/* tabr[] = taba[] * b + l. Return the high carry */
static limb_t mp_mul1(limb_t *tabr, const limb_t *taba, limb_t n,
limb_t b, limb_t l)
{
limb_t i;
dlimb_t t;
for(i = 0; i < n; i++) {
t = (dlimb_t)taba[i] * (dlimb_t)b + l;
tabr[i] = t;
l = t >> LIMB_BITS;
}
return l;
}
/* tabr[] += taba[] * b, return the high word. */
static limb_t mp_add_mul1(limb_t *tabr, const limb_t *taba, limb_t n,
limb_t b)
{
limb_t i, l;
dlimb_t t;
l = 0;
for(i = 0; i < n; i++) {
t = (dlimb_t)taba[i] * (dlimb_t)b + l + tabr[i];
tabr[i] = t;
l = t >> LIMB_BITS;
}
return l;
}
/* size of the result : op1_size + op2_size. */
static void mp_mul_basecase(limb_t *result,
const limb_t *op1, limb_t op1_size,
const limb_t *op2, limb_t op2_size)
{
limb_t i, r;
result[op1_size] = mp_mul1(result, op1, op1_size, op2[0], 0);
for(i=1;i<op2_size;i++) {
r = mp_add_mul1(result + i, op1, op1_size, op2[i]);
result[i + op1_size] = r;
}
}
/* return 0 if OK, -1 if memory error */
/* XXX: change API so that result can be allocated */
int mp_mul(bf_context_t *s, limb_t *result,
const limb_t *op1, limb_t op1_size,
const limb_t *op2, limb_t op2_size)
{
#ifdef USE_FFT_MUL
if (unlikely(bf_min(op1_size, op2_size) >= FFT_MUL_THRESHOLD)) {
bf_t r_s, *r = &r_s;
r->tab = result;
/* XXX: optimize memory usage in API */
if (fft_mul(s, r, (limb_t *)op1, op1_size,
(limb_t *)op2, op2_size, FFT_MUL_R_NORESIZE))
return -1;
} else
#endif
{
mp_mul_basecase(result, op1, op1_size, op2, op2_size);
}
return 0;
}
/* tabr[] -= taba[] * b. Return the value to substract to the high
word. */
static limb_t mp_sub_mul1(limb_t *tabr, const limb_t *taba, limb_t n,
limb_t b)
{
limb_t i, l;
dlimb_t t;
l = 0;
for(i = 0; i < n; i++) {
t = tabr[i] - (dlimb_t)taba[i] * (dlimb_t)b - l;
tabr[i] = t;
l = -(t >> LIMB_BITS);
}
return l;
}
/* WARNING: d must be >= 2^(LIMB_BITS-1) */
static inline limb_t udiv1norm_init(limb_t d)
{
limb_t a0, a1;
a1 = -d - 1;
a0 = -1;
return (((dlimb_t)a1 << LIMB_BITS) | a0) / d;
}
/* return the quotient and the remainder in '*pr'of 'a1*2^LIMB_BITS+a0
/ d' with 0 <= a1 < d. */
static inline limb_t udiv1norm(limb_t *pr, limb_t a1, limb_t a0,
limb_t d, limb_t d_inv)
{
limb_t n1m, n_adj, q, r, ah;
dlimb_t a;
n1m = ((slimb_t)a0 >> (LIMB_BITS - 1));
n_adj = a0 + (n1m & d);
a = (dlimb_t)d_inv * (a1 - n1m) + n_adj;
q = (a >> LIMB_BITS) + a1;
/* compute a - q * r and update q so that the remainder is\
between 0 and d - 1 */
a = ((dlimb_t)a1 << LIMB_BITS) | a0;
a = a - (dlimb_t)q * d - d;
ah = a >> LIMB_BITS;
q += 1 + ah;
r = (limb_t)a + (ah & d);
*pr = r;
return q;
}
/* b must be >= 1 << (LIMB_BITS - 1) */
static limb_t mp_div1norm(limb_t *tabr, const limb_t *taba, limb_t n,
limb_t b, limb_t r)
{
slimb_t i;
if (n >= UDIV1NORM_THRESHOLD) {
limb_t b_inv;
b_inv = udiv1norm_init(b);
for(i = n - 1; i >= 0; i--) {
tabr[i] = udiv1norm(&r, r, taba[i], b, b_inv);
}
} else {
dlimb_t a1;
for(i = n - 1; i >= 0; i--) {
a1 = ((dlimb_t)r << LIMB_BITS) | taba[i];
tabr[i] = a1 / b;
r = a1 % b;
}
}
return r;
}
static int mp_divnorm_large(bf_context_t *s,
limb_t *tabq, limb_t *taba, limb_t na,
const limb_t *tabb, limb_t nb);
/* base case division: divides taba[0..na-1] by tabb[0..nb-1]. tabb[nb
- 1] must be >= 1 << (LIMB_BITS - 1). na - nb must be >= 0. 'taba'
is modified and contains the remainder (nb limbs). tabq[0..na-nb]
contains the quotient with tabq[na - nb] <= 1. */
static int mp_divnorm(bf_context_t *s, limb_t *tabq, limb_t *taba, limb_t na,
const limb_t *tabb, limb_t nb)
{
limb_t r, a, c, q, v, b1, b1_inv, n, dummy_r;
slimb_t i, j;
b1 = tabb[nb - 1];
if (nb == 1) {
taba[0] = mp_div1norm(tabq, taba, na, b1, 0);
return 0;
}
n = na - nb;
if (bf_min(n, nb) >= DIVNORM_LARGE_THRESHOLD) {
return mp_divnorm_large(s, tabq, taba, na, tabb, nb);
}
if (n >= UDIV1NORM_THRESHOLD)
b1_inv = udiv1norm_init(b1);
else
b1_inv = 0;
/* first iteration: the quotient is only 0 or 1 */
q = 1;
for(j = nb - 1; j >= 0; j--) {
if (taba[n + j] != tabb[j]) {
if (taba[n + j] < tabb[j])
q = 0;
break;
}
}
tabq[n] = q;
if (q) {
mp_sub(taba + n, taba + n, tabb, nb, 0);
}
for(i = n - 1; i >= 0; i--) {
if (unlikely(taba[i + nb] >= b1)) {
q = -1;
} else if (b1_inv) {
q = udiv1norm(&dummy_r, taba[i + nb], taba[i + nb - 1], b1, b1_inv);
} else {
dlimb_t al;
al = ((dlimb_t)taba[i + nb] << LIMB_BITS) | taba[i + nb - 1];
q = al / b1;
r = al % b1;
}
r = mp_sub_mul1(taba + i, tabb, nb, q);
v = taba[i + nb];
a = v - r;
c = (a > v);
taba[i + nb] = a;
if (c != 0) {
/* negative result */
for(;;) {
q--;
c = mp_add(taba + i, taba + i, tabb, nb, 0);
/* propagate carry and test if positive result */
if (c != 0) {
if (++taba[i + nb] == 0) {
break;
}
}
}
}
tabq[i] = q;
}
return 0;
}
/* compute r=B^(2*n)/a such as a*r < B^(2*n) < a*r + 2 with n >= 1. 'a'
has n limbs with a[n-1] >= B/2 and 'r' has n+1 limbs with r[n] = 1.
See Modern Computer Arithmetic by Richard P. Brent and Paul
Zimmermann, algorithm 3.5 */
int mp_recip(bf_context_t *s, limb_t *tabr, const limb_t *taba, limb_t n)
{
mp_size_t l, h, k, i;
limb_t *tabxh, *tabt, c, *tabu;
if (n <= 2) {
/* return ceil(B^(2*n)/a) - 1 */
/* XXX: could avoid allocation */
tabu = bf_malloc(s, sizeof(limb_t) * (2 * n + 1));
tabt = bf_malloc(s, sizeof(limb_t) * (n + 2));
if (!tabt || !tabu)
goto fail;
for(i = 0; i < 2 * n; i++)
tabu[i] = 0;
tabu[2 * n] = 1;
if (mp_divnorm(s, tabt, tabu, 2 * n + 1, taba, n))
goto fail;
for(i = 0; i < n + 1; i++)
tabr[i] = tabt[i];
if (mp_scan_nz(tabu, n) == 0) {
/* only happens for a=B^n/2 */
mp_sub_ui(tabr, 1, n + 1);
}
} else {
l = (n - 1) / 2;
h = n - l;
/* n=2p -> l=p-1, h = p + 1, k = p + 3
n=2p+1-> l=p, h = p + 1; k = p + 2
*/
tabt = bf_malloc(s, sizeof(limb_t) * (n + h + 1));
tabu = bf_malloc(s, sizeof(limb_t) * (n + 2 * h - l + 2));
if (!tabt || !tabu)
goto fail;
tabxh = tabr + l;
if (mp_recip(s, tabxh, taba + l, h))
goto fail;
if (mp_mul(s, tabt, taba, n, tabxh, h + 1)) /* n + h + 1 limbs */
goto fail;
while (tabt[n + h] != 0) {
mp_sub_ui(tabxh, 1, h + 1);
c = mp_sub(tabt, tabt, taba, n, 0);
mp_sub_ui(tabt + n, c, h + 1);
}
/* T = B^(n+h) - T */
mp_neg(tabt, tabt, n + h + 1, 0);
tabt[n + h]++;
if (mp_mul(s, tabu, tabt + l, n + h + 1 - l, tabxh, h + 1))
goto fail;
/* n + 2*h - l + 2 limbs */
k = 2 * h - l;
for(i = 0; i < l; i++)
tabr[i] = tabu[i + k];
mp_add(tabr + l, tabr + l, tabu + 2 * h, h, 0);
}
bf_free(s, tabt);
bf_free(s, tabu);
return 0;
fail:
bf_free(s, tabt);
bf_free(s, tabu);
return -1;
}
/* return -1, 0 or 1 */
static int mp_cmp(const limb_t *taba, const limb_t *tabb, mp_size_t n)
{
mp_size_t i;
for(i = n - 1; i >= 0; i--) {
if (taba[i] != tabb[i]) {
if (taba[i] < tabb[i])
return -1;
else
return 1;
}
}
return 0;
}
//#define DEBUG_DIVNORM_LARGE
//#define DEBUG_DIVNORM_LARGE2
/* subquadratic divnorm */
static int mp_divnorm_large(bf_context_t *s,
limb_t *tabq, limb_t *taba, limb_t na,
const limb_t *tabb, limb_t nb)
{
limb_t *tabb_inv, nq, *tabt, i, n;
nq = na - nb;
#ifdef DEBUG_DIVNORM_LARGE
printf("na=%d nb=%d nq=%d\n", (int)na, (int)nb, (int)nq);
mp_print_str("a", taba, na);
mp_print_str("b", tabb, nb);
#endif
assert(nq >= 1);
n = nq;
if (nq < nb)
n++;
tabb_inv = bf_malloc(s, sizeof(limb_t) * (n + 1));
tabt = bf_malloc(s, sizeof(limb_t) * 2 * (n + 1));
if (!tabb_inv || !tabt)
goto fail;
if (n >= nb) {
for(i = 0; i < n - nb; i++)
tabt[i] = 0;
for(i = 0; i < nb; i++)
tabt[i + n - nb] = tabb[i];
} else {
/* truncate B: need to increment it so that the approximate
inverse is smaller that the exact inverse */
for(i = 0; i < n; i++)
tabt[i] = tabb[i + nb - n];
if (mp_add_ui(tabt, 1, n)) {
/* tabt = B^n : tabb_inv = B^n */
memset(tabb_inv, 0, n * sizeof(limb_t));
tabb_inv[n] = 1;
goto recip_done;
}
}
if (mp_recip(s, tabb_inv, tabt, n))
goto fail;
recip_done:
/* Q=A*B^-1 */
if (mp_mul(s, tabt, tabb_inv, n + 1, taba + na - (n + 1), n + 1))
goto fail;
for(i = 0; i < nq + 1; i++)
tabq[i] = tabt[i + 2 * (n + 1) - (nq + 1)];
#ifdef DEBUG_DIVNORM_LARGE
mp_print_str("q", tabq, nq + 1);
#endif
bf_free(s, tabt);
bf_free(s, tabb_inv);
tabb_inv = NULL;
/* R=A-B*Q */
tabt = bf_malloc(s, sizeof(limb_t) * (na + 1));
if (!tabt)
goto fail;
if (mp_mul(s, tabt, tabq, nq + 1, tabb, nb))
goto fail;
/* we add one more limb for the result */
mp_sub(taba, taba, tabt, nb + 1, 0);
bf_free(s, tabt);
/* the approximated quotient is smaller than than the exact one,
hence we may have to increment it */
#ifdef DEBUG_DIVNORM_LARGE2
int cnt = 0;
static int cnt_max;
#endif
for(;;) {
if (taba[nb] == 0 && mp_cmp(taba, tabb, nb) < 0)
break;
taba[nb] -= mp_sub(taba, taba, tabb, nb, 0);
mp_add_ui(tabq, 1, nq + 1);
#ifdef DEBUG_DIVNORM_LARGE2
cnt++;
#endif
}
#ifdef DEBUG_DIVNORM_LARGE2
if (cnt > cnt_max) {
cnt_max = cnt;
printf("\ncnt=%d nq=%d nb=%d\n", cnt_max, (int)nq, (int)nb);
}
#endif
return 0;
fail:
bf_free(s, tabb_inv);
bf_free(s, tabt);
return -1;
}
int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
int ret, r_sign;
if (a->len < b->len) {
const bf_t *tmp = a;
a = b;
b = tmp;
}
r_sign = a->sign ^ b->sign;
/* here b->len <= a->len */
if (b->len == 0) {
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
bf_set_nan(r);
ret = 0;
} else if (a->expn == BF_EXP_INF || b->expn == BF_EXP_INF) {
if ((a->expn == BF_EXP_INF && b->expn == BF_EXP_ZERO) ||
(a->expn == BF_EXP_ZERO && b->expn == BF_EXP_INF)) {
bf_set_nan(r);
ret = BF_ST_INVALID_OP;
} else {
bf_set_inf(r, r_sign);
ret = 0;
}
} else {
bf_set_zero(r, r_sign);
ret = 0;
}
} else {
bf_t tmp, *r1 = NULL;
limb_t a_len, b_len, precl;
limb_t *a_tab, *b_tab;
a_len = a->len;
b_len = b->len;
if ((flags & BF_RND_MASK) == BF_RNDF) {
/* faithful rounding does not require using the full inputs */
precl = (prec + 2 + LIMB_BITS - 1) / LIMB_BITS;
a_len = bf_min(a_len, precl);
b_len = bf_min(b_len, precl);
}
a_tab = a->tab + a->len - a_len;
b_tab = b->tab + b->len - b_len;
#ifdef USE_FFT_MUL
if (b_len >= FFT_MUL_THRESHOLD) {
int mul_flags = 0;
if (r == a)
mul_flags |= FFT_MUL_R_OVERLAP_A;
if (r == b)
mul_flags |= FFT_MUL_R_OVERLAP_B;
if (fft_mul(r->ctx, r, a_tab, a_len, b_tab, b_len, mul_flags))
goto fail;
} else
#endif
{
if (r == a || r == b) {
bf_init(r->ctx, &tmp);
r1 = r;
r = &tmp;
}
if (bf_resize(r, a_len + b_len)) {
fail:
bf_set_nan(r);
ret = BF_ST_MEM_ERROR;
goto done;
}
mp_mul_basecase(r->tab, a_tab, a_len, b_tab, b_len);
}
r->sign = r_sign;
r->expn = a->expn + b->expn;
ret = bf_normalize_and_round(r, prec, flags);
done:
if (r == &tmp)
bf_move(r1, &tmp);
}
return ret;
}
/* multiply 'r' by 2^e */
int bf_mul_2exp(bf_t *r, slimb_t e, limb_t prec, bf_flags_t flags)
{
slimb_t e_max;
if (r->len == 0)
return 0;
e_max = ((limb_t)1 << BF_EXT_EXP_BITS_MAX) - 1;
e = bf_max(e, -e_max);
e = bf_min(e, e_max);
r->expn += e;
return __bf_round(r, prec, flags, r->len, 0);
}
/* Return e such as a=m*2^e with m odd integer. return 0 if a is zero,
Infinite or Nan. */
slimb_t bf_get_exp_min(const bf_t *a)
{
slimb_t i;
limb_t v;
int k;
for(i = 0; i < a->len; i++) {
v = a->tab[i];
if (v != 0) {
k = ctz(v);
return a->expn - (a->len - i) * LIMB_BITS + k;
}
}
return 0;
}
/* a and b must be finite numbers with a >= 0 and b > 0. 'q' is the
integer defined as floor(a/b) and r = a - q * b. */
static void bf_tdivremu(bf_t *q, bf_t *r,
const bf_t *a, const bf_t *b)
{
if (bf_cmpu(a, b) < 0) {
bf_set_ui(q, 0);
bf_set(r, a);
} else {
bf_div(q, a, b, bf_max(a->expn - b->expn + 1, 2), BF_RNDZ);
bf_rint(q, BF_RNDZ);
bf_mul(r, q, b, BF_PREC_INF, BF_RNDZ);
bf_sub(r, a, r, BF_PREC_INF, BF_RNDZ);
}
}
static int __bf_div(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
bf_context_t *s = r->ctx;
int ret, r_sign;
limb_t n, nb, precl;
r_sign = a->sign ^ b->sign;
if (a->expn >= BF_EXP_INF || b->expn >= BF_EXP_INF) {
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
bf_set_nan(r);
return 0;
} else if (a->expn == BF_EXP_INF && b->expn == BF_EXP_INF) {
bf_set_nan(r);
return BF_ST_INVALID_OP;
} else if (a->expn == BF_EXP_INF) {
bf_set_inf(r, r_sign);
return 0;
} else {
bf_set_zero(r, r_sign);
return 0;
}
} else if (a->expn == BF_EXP_ZERO) {
if (b->expn == BF_EXP_ZERO) {
bf_set_nan(r);
return BF_ST_INVALID_OP;
} else {
bf_set_zero(r, r_sign);
return 0;
}
} else if (b->expn == BF_EXP_ZERO) {
bf_set_inf(r, r_sign);
return BF_ST_DIVIDE_ZERO;
}
/* number of limbs of the quotient (2 extra bits for rounding) */
precl = (prec + 2 + LIMB_BITS - 1) / LIMB_BITS;
nb = b->len;
n = bf_max(a->len, precl);
{
limb_t *taba, na;
slimb_t d;
na = n + nb;
taba = bf_malloc(s, (na + 1) * sizeof(limb_t));
if (!taba)
goto fail;
d = na - a->len;
memset(taba, 0, d * sizeof(limb_t));
memcpy(taba + d, a->tab, a->len * sizeof(limb_t));
if (bf_resize(r, n + 1))
goto fail1;
if (mp_divnorm(s, r->tab, taba, na, b->tab, nb)) {
fail1:
bf_free(s, taba);
goto fail;
}
/* see if non zero remainder */
if (mp_scan_nz(taba, nb))
r->tab[0] |= 1;
bf_free(r->ctx, taba);
r->expn = a->expn - b->expn + LIMB_BITS;
r->sign = r_sign;
ret = bf_normalize_and_round(r, prec, flags);
}
return ret;
fail:
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
/* division and remainder.
rnd_mode is the rounding mode for the quotient. The additional
rounding mode BF_RND_EUCLIDIAN is supported.
'q' is an integer. 'r' is rounded with prec and flags (prec can be
BF_PREC_INF).
*/
int bf_divrem(bf_t *q, bf_t *r, const bf_t *a, const bf_t *b,
limb_t prec, bf_flags_t flags, int rnd_mode)
{
bf_t a1_s, *a1 = &a1_s;
bf_t b1_s, *b1 = &b1_s;
int q_sign, ret;
BOOL is_ceil, is_rndn;
assert(q != a && q != b);
assert(r != a && r != b);
assert(q != r);
if (a->len == 0 || b->len == 0) {
bf_set_zero(q, 0);
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
bf_set_nan(r);
return 0;
} else if (a->expn == BF_EXP_INF || b->expn == BF_EXP_ZERO) {
bf_set_nan(r);
return BF_ST_INVALID_OP;
} else {
bf_set(r, a);
return bf_round(r, prec, flags);
}
}
q_sign = a->sign ^ b->sign;
is_rndn = (rnd_mode == BF_RNDN || rnd_mode == BF_RNDNA);
switch(rnd_mode) {
default:
case BF_RNDZ:
case BF_RNDN:
case BF_RNDNA:
is_ceil = FALSE;
break;
case BF_RNDD:
is_ceil = q_sign;
break;
case BF_RNDU:
is_ceil = q_sign ^ 1;
break;
case BF_RNDA:
is_ceil = TRUE;
break;
case BF_DIVREM_EUCLIDIAN:
is_ceil = a->sign;
break;
}
a1->expn = a->expn;
a1->tab = a->tab;
a1->len = a->len;
a1->sign = 0;
b1->expn = b->expn;
b1->tab = b->tab;
b1->len = b->len;
b1->sign = 0;
/* XXX: could improve to avoid having a large 'q' */
bf_tdivremu(q, r, a1, b1);
if (bf_is_nan(q) || bf_is_nan(r))
goto fail;
if (r->len != 0) {
if (is_rndn) {
int res;
b1->expn--;
res = bf_cmpu(r, b1);
b1->expn++;
if (res > 0 ||
(res == 0 &&
(rnd_mode == BF_RNDNA ||
get_bit(q->tab, q->len, q->len * LIMB_BITS - q->expn)))) {
goto do_sub_r;
}
} else if (is_ceil) {
do_sub_r:
ret = bf_add_si(q, q, 1, BF_PREC_INF, BF_RNDZ);
ret |= bf_sub(r, r, b1, BF_PREC_INF, BF_RNDZ);
if (ret & BF_ST_MEM_ERROR)
goto fail;
}
}
r->sign ^= a->sign;
q->sign = q_sign;
return bf_round(r, prec, flags);
fail:
bf_set_nan(q);
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
int bf_rem(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags, int rnd_mode)
{
bf_t q_s, *q = &q_s;
int ret;
bf_init(r->ctx, q);
ret = bf_divrem(q, r, a, b, prec, flags, rnd_mode);
bf_delete(q);
return ret;
}
static inline int bf_get_limb(slimb_t *pres, const bf_t *a, int flags)
{
#if LIMB_BITS == 32
return bf_get_int32(pres, a, flags);
#else
return bf_get_int64(pres, a, flags);
#endif
}
int bf_remquo(slimb_t *pq, bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags, int rnd_mode)
{
bf_t q_s, *q = &q_s;
int ret;
bf_init(r->ctx, q);
ret = bf_divrem(q, r, a, b, prec, flags, rnd_mode);
bf_get_limb(pq, q, BF_GET_INT_MOD);
bf_delete(q);
return ret;
}
static __maybe_unused inline limb_t mul_mod(limb_t a, limb_t b, limb_t m)
{
dlimb_t t;
t = (dlimb_t)a * (dlimb_t)b;
return t % m;
}
#if defined(USE_MUL_CHECK)
static limb_t mp_mod1(const limb_t *tab, limb_t n, limb_t m, limb_t r)
{
slimb_t i;
dlimb_t t;
for(i = n - 1; i >= 0; i--) {
t = ((dlimb_t)r << LIMB_BITS) | tab[i];
r = t % m;
}
return r;
}
#endif
static const uint16_t sqrt_table[192] = {
128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,
};
/* a >= 2^(LIMB_BITS - 2). Return (s, r) with s=floor(sqrt(a)) and
r=a-s^2. 0 <= r <= 2 * s */
static limb_t mp_sqrtrem1(limb_t *pr, limb_t a)
{
limb_t s1, r1, s, r, q, u, num;
/* use a table for the 16 -> 8 bit sqrt */
s1 = sqrt_table[(a >> (LIMB_BITS - 8)) - 64];
r1 = (a >> (LIMB_BITS - 16)) - s1 * s1;
if (r1 > 2 * s1) {
r1 -= 2 * s1 + 1;
s1++;
}
/* one iteration to get a 32 -> 16 bit sqrt */
num = (r1 << 8) | ((a >> (LIMB_BITS - 32 + 8)) & 0xff);
q = num / (2 * s1); /* q <= 2^8 */
u = num % (2 * s1);
s = (s1 << 8) + q;
r = (u << 8) | ((a >> (LIMB_BITS - 32)) & 0xff);
r -= q * q;
if ((slimb_t)r < 0) {
s--;
r += 2 * s + 1;
}
#if LIMB_BITS == 64
s1 = s;
r1 = r;
/* one more iteration for 64 -> 32 bit sqrt */
num = (r1 << 16) | ((a >> (LIMB_BITS - 64 + 16)) & 0xffff);
q = num / (2 * s1); /* q <= 2^16 */
u = num % (2 * s1);
s = (s1 << 16) + q;
r = (u << 16) | ((a >> (LIMB_BITS - 64)) & 0xffff);
r -= q * q;
if ((slimb_t)r < 0) {
s--;
r += 2 * s + 1;
}
#endif
*pr = r;
return s;
}
/* return floor(sqrt(a)) */
limb_t bf_isqrt(limb_t a)
{
limb_t s, r;
int k;
if (a == 0)
return 0;
k = clz(a) & ~1;
s = mp_sqrtrem1(&r, a << k);
s >>= (k >> 1);
return s;
}
static limb_t mp_sqrtrem2(limb_t *tabs, limb_t *taba)
{
limb_t s1, r1, s, q, u, a0, a1;
dlimb_t r, num;
int l;
a0 = taba[0];
a1 = taba[1];
s1 = mp_sqrtrem1(&r1, a1);
l = LIMB_BITS / 2;
num = ((dlimb_t)r1 << l) | (a0 >> l);
q = num / (2 * s1);
u = num % (2 * s1);
s = (s1 << l) + q;
r = ((dlimb_t)u << l) | (a0 & (((limb_t)1 << l) - 1));
if (unlikely((q >> l) != 0))
r -= (dlimb_t)1 << LIMB_BITS; /* special case when q=2^l */
else
r -= q * q;
if ((slimb_t)(r >> LIMB_BITS) < 0) {
s--;
r += 2 * (dlimb_t)s + 1;
}
tabs[0] = s;
taba[0] = r;
return r >> LIMB_BITS;
}
//#define DEBUG_SQRTREM
/* tmp_buf must contain (n / 2 + 1 limbs). *prh contains the highest
limb of the remainder. */
static int mp_sqrtrem_rec(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n,
limb_t *tmp_buf, limb_t *prh)
{
limb_t l, h, rh, ql, qh, c, i;
if (n == 1) {
*prh = mp_sqrtrem2(tabs, taba);
return 0;
}
#ifdef DEBUG_SQRTREM
mp_print_str("a", taba, 2 * n);
#endif
l = n / 2;
h = n - l;
if (mp_sqrtrem_rec(s, tabs + l, taba + 2 * l, h, tmp_buf, &qh))
return -1;
#ifdef DEBUG_SQRTREM
mp_print_str("s1", tabs + l, h);
mp_print_str_h("r1", taba + 2 * l, h, qh);
mp_print_str_h("r2", taba + l, n, qh);
#endif
/* the remainder is in taba + 2 * l. Its high bit is in qh */
if (qh) {
mp_sub(taba + 2 * l, taba + 2 * l, tabs + l, h, 0);
}
/* instead of dividing by 2*s, divide by s (which is normalized)
and update q and r */
if (mp_divnorm(s, tmp_buf, taba + l, n, tabs + l, h))
return -1;
qh += tmp_buf[l];
for(i = 0; i < l; i++)
tabs[i] = tmp_buf[i];
ql = mp_shr(tabs, tabs, l, 1, qh & 1);
qh = qh >> 1; /* 0 or 1 */
if (ql)
rh = mp_add(taba + l, taba + l, tabs + l, h, 0);
else
rh = 0;
#ifdef DEBUG_SQRTREM
mp_print_str_h("q", tabs, l, qh);
mp_print_str_h("u", taba + l, h, rh);
#endif
mp_add_ui(tabs + l, qh, h);
#ifdef DEBUG_SQRTREM
mp_print_str_h("s2", tabs, n, sh);
#endif
/* q = qh, tabs[l - 1 ... 0], r = taba[n - 1 ... l] */
/* subtract q^2. if qh = 1 then q = B^l, so we can take shortcuts */
if (qh) {
c = qh;
} else {
if (mp_mul(s, taba + n, tabs, l, tabs, l))
return -1;
c = mp_sub(taba, taba, taba + n, 2 * l, 0);
}
rh -= mp_sub_ui(taba + 2 * l, c, n - 2 * l);
if ((slimb_t)rh < 0) {
mp_sub_ui(tabs, 1, n);
rh += mp_add_mul1(taba, tabs, n, 2);
rh += mp_add_ui(taba, 1, n);
}
*prh = rh;
return 0;
}
/* 'taba' has 2*n limbs with n >= 1 and taba[2*n-1] >= 2 ^ (LIMB_BITS
- 2). Return (s, r) with s=floor(sqrt(a)) and r=a-s^2. 0 <= r <= 2
* s. tabs has n limbs. r is returned in the lower n limbs of
taba. Its r[n] is the returned value of the function. */
/* Algorithm from the article "Karatsuba Square Root" by Paul Zimmermann and
inspirated from its GMP implementation */
int mp_sqrtrem(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n)
{
limb_t tmp_buf1[8];
limb_t *tmp_buf;
mp_size_t n2;
int ret;
n2 = n / 2 + 1;
if (n2 <= countof(tmp_buf1)) {
tmp_buf = tmp_buf1;
} else {
tmp_buf = bf_malloc(s, sizeof(limb_t) * n2);
if (!tmp_buf)
return -1;
}
ret = mp_sqrtrem_rec(s, tabs, taba, n, tmp_buf, taba + n);
if (tmp_buf != tmp_buf1)
bf_free(s, tmp_buf);
return ret;
}
/* Integer square root with remainder. 'a' must be an integer. r =
floor(sqrt(a)) and rem = a - r^2. BF_ST_INEXACT is set if the result
is inexact. 'rem' can be NULL if the remainder is not needed. */
int bf_sqrtrem(bf_t *r, bf_t *rem1, const bf_t *a)
{
int ret;
if (a->len == 0) {
if (a->expn == BF_EXP_NAN) {
bf_set_nan(r);
} else if (a->expn == BF_EXP_INF && a->sign) {
goto invalid_op;
} else {
bf_set(r, a);
}
if (rem1)
bf_set_ui(rem1, 0);
ret = 0;
} else if (a->sign) {
invalid_op:
bf_set_nan(r);
if (rem1)
bf_set_ui(rem1, 0);
ret = BF_ST_INVALID_OP;
} else {
bf_t rem_s, *rem;
bf_sqrt(r, a, (a->expn + 1) / 2, BF_RNDZ);
bf_rint(r, BF_RNDZ);
/* see if the result is exact by computing the remainder */
if (rem1) {
rem = rem1;
} else {
rem = &rem_s;
bf_init(r->ctx, rem);
}
/* XXX: could avoid recomputing the remainder */
bf_mul(rem, r, r, BF_PREC_INF, BF_RNDZ);
bf_neg(rem);
bf_add(rem, rem, a, BF_PREC_INF, BF_RNDZ);
if (bf_is_nan(rem)) {
ret = BF_ST_MEM_ERROR;
goto done;
}
if (rem->len != 0) {
ret = BF_ST_INEXACT;
} else {
ret = 0;
}
done:
if (!rem1)
bf_delete(rem);
}
return ret;
}
int bf_sqrt(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags)
{
bf_context_t *s = a->ctx;
int ret;
assert(r != a);
if (a->len == 0) {
if (a->expn == BF_EXP_NAN) {
bf_set_nan(r);
} else if (a->expn == BF_EXP_INF && a->sign) {
goto invalid_op;
} else {
bf_set(r, a);
}
ret = 0;
} else if (a->sign) {
invalid_op:
bf_set_nan(r);
ret = BF_ST_INVALID_OP;
} else {
limb_t *a1;
slimb_t n, n1;
limb_t res;
/* convert the mantissa to an integer with at least 2 *
prec + 4 bits */
n = (2 * (prec + 2) + 2 * LIMB_BITS - 1) / (2 * LIMB_BITS);
if (bf_resize(r, n))
goto fail;
a1 = bf_malloc(s, sizeof(limb_t) * 2 * n);
if (!a1)
goto fail;
n1 = bf_min(2 * n, a->len);
memset(a1, 0, (2 * n - n1) * sizeof(limb_t));
memcpy(a1 + 2 * n - n1, a->tab + a->len - n1, n1 * sizeof(limb_t));
if (a->expn & 1) {
res = mp_shr(a1, a1, 2 * n, 1, 0);
} else {
res = 0;
}
if (mp_sqrtrem(s, r->tab, a1, n)) {
bf_free(s, a1);
goto fail;
}
if (!res) {
res = mp_scan_nz(a1, n + 1);
}
bf_free(s, a1);
if (!res) {
res = mp_scan_nz(a->tab, a->len - n1);
}
if (res != 0)
r->tab[0] |= 1;
r->sign = 0;
r->expn = (a->expn + 1) >> 1;
ret = bf_round(r, prec, flags);
}
return ret;
fail:
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
static no_inline int bf_op2(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags, bf_op2_func_t *func)
{
bf_t tmp;
int ret;
if (r == a || r == b) {
bf_init(r->ctx, &tmp);
ret = func(&tmp, a, b, prec, flags);
bf_move(r, &tmp);
} else {
ret = func(r, a, b, prec, flags);
}
return ret;
}
int bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
return bf_op2(r, a, b, prec, flags, __bf_add);
}
int bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
return bf_op2(r, a, b, prec, flags, __bf_sub);
}
int bf_div(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
return bf_op2(r, a, b, prec, flags, __bf_div);
}
int bf_mul_ui(bf_t *r, const bf_t *a, uint64_t b1, limb_t prec,
bf_flags_t flags)
{
bf_t b;
int ret;
bf_init(r->ctx, &b);
ret = bf_set_ui(&b, b1);
ret |= bf_mul(r, a, &b, prec, flags);
bf_delete(&b);
return ret;
}
int bf_mul_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec,
bf_flags_t flags)
{
bf_t b;
int ret;
bf_init(r->ctx, &b);
ret = bf_set_si(&b, b1);
ret |= bf_mul(r, a, &b, prec, flags);
bf_delete(&b);
return ret;
}
int bf_add_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec,
bf_flags_t flags)
{
bf_t b;
int ret;
bf_init(r->ctx, &b);
ret = bf_set_si(&b, b1);
ret |= bf_add(r, a, &b, prec, flags);
bf_delete(&b);
return ret;
}
static int bf_pow_ui(bf_t *r, const bf_t *a, limb_t b, limb_t prec,
bf_flags_t flags)
{
int ret, n_bits, i;
assert(r != a);
if (b == 0)
return bf_set_ui(r, 1);
ret = bf_set(r, a);
n_bits = LIMB_BITS - clz(b);
for(i = n_bits - 2; i >= 0; i--) {
ret |= bf_mul(r, r, r, prec, flags);
if ((b >> i) & 1)
ret |= bf_mul(r, r, a, prec, flags);
}
return ret;
}
static int bf_pow_ui_ui(bf_t *r, limb_t a1, limb_t b,
limb_t prec, bf_flags_t flags)
{
bf_t a;
int ret;
if (a1 == 10 && b <= LIMB_DIGITS) {
/* use precomputed powers. We do not round at this point
because we expect the caller to do it */
ret = bf_set_ui(r, mp_pow_dec[b]);
} else {
bf_init(r->ctx, &a);
ret = bf_set_ui(&a, a1);
ret |= bf_pow_ui(r, &a, b, prec, flags);
bf_delete(&a);
}
return ret;
}
/* convert to integer (infinite precision) */
int bf_rint(bf_t *r, int rnd_mode)
{
return bf_round(r, 0, rnd_mode | BF_FLAG_RADPNT_PREC);
}
/* logical operations */
#define BF_LOGIC_OR 0
#define BF_LOGIC_XOR 1
#define BF_LOGIC_AND 2
static inline limb_t bf_logic_op1(limb_t a, limb_t b, int op)
{
switch(op) {
case BF_LOGIC_OR:
return a | b;
case BF_LOGIC_XOR:
return a ^ b;
default:
case BF_LOGIC_AND:
return a & b;
}
}
static int bf_logic_op(bf_t *r, const bf_t *a1, const bf_t *b1, int op)
{
bf_t b1_s, a1_s, *a, *b;
limb_t a_sign, b_sign, r_sign;
slimb_t l, i, a_bit_offset, b_bit_offset;
limb_t v1, v2, v1_mask, v2_mask, r_mask;
int ret;
assert(r != a1 && r != b1);
if (a1->expn <= 0)
a_sign = 0; /* minus zero is considered as positive */
else
a_sign = a1->sign;
if (b1->expn <= 0)
b_sign = 0; /* minus zero is considered as positive */
else
b_sign = b1->sign;
if (a_sign) {
a = &a1_s;
bf_init(r->ctx, a);
if (bf_add_si(a, a1, 1, BF_PREC_INF, BF_RNDZ)) {
b = NULL;
goto fail;
}
} else {
a = (bf_t *)a1;
}
if (b_sign) {
b = &b1_s;
bf_init(r->ctx, b);
if (bf_add_si(b, b1, 1, BF_PREC_INF, BF_RNDZ))
goto fail;
} else {
b = (bf_t *)b1;
}
r_sign = bf_logic_op1(a_sign, b_sign, op);
if (op == BF_LOGIC_AND && r_sign == 0) {
/* no need to compute extra zeros for and */
if (a_sign == 0 && b_sign == 0)
l = bf_min(a->expn, b->expn);
else if (a_sign == 0)
l = a->expn;
else
l = b->expn;
} else {
l = bf_max(a->expn, b->expn);
}
/* Note: a or b can be zero */
l = (bf_max(l, 1) + LIMB_BITS - 1) / LIMB_BITS;
if (bf_resize(r, l))
goto fail;
a_bit_offset = a->len * LIMB_BITS - a->expn;
b_bit_offset = b->len * LIMB_BITS - b->expn;
v1_mask = -a_sign;
v2_mask = -b_sign;
r_mask = -r_sign;
for(i = 0; i < l; i++) {
v1 = get_bits(a->tab, a->len, a_bit_offset + i * LIMB_BITS) ^ v1_mask;
v2 = get_bits(b->tab, b->len, b_bit_offset + i * LIMB_BITS) ^ v2_mask;
r->tab[i] = bf_logic_op1(v1, v2, op) ^ r_mask;
}
r->expn = l * LIMB_BITS;
r->sign = r_sign;
bf_normalize_and_round(r, BF_PREC_INF, BF_RNDZ); /* cannot fail */
if (r_sign) {
if (bf_add_si(r, r, -1, BF_PREC_INF, BF_RNDZ))
goto fail;
}
ret = 0;
done:
if (a == &a1_s)
bf_delete(a);
if (b == &b1_s)
bf_delete(b);
return ret;
fail:
bf_set_nan(r);
ret = BF_ST_MEM_ERROR;
goto done;
}
/* 'a' and 'b' must be integers. Return 0 or BF_ST_MEM_ERROR. */
int bf_logic_or(bf_t *r, const bf_t *a, const bf_t *b)
{
return bf_logic_op(r, a, b, BF_LOGIC_OR);
}
/* 'a' and 'b' must be integers. Return 0 or BF_ST_MEM_ERROR. */
int bf_logic_xor(bf_t *r, const bf_t *a, const bf_t *b)
{
return bf_logic_op(r, a, b, BF_LOGIC_XOR);
}
/* 'a' and 'b' must be integers. Return 0 or BF_ST_MEM_ERROR. */
int bf_logic_and(bf_t *r, const bf_t *a, const bf_t *b)
{
return bf_logic_op(r, a, b, BF_LOGIC_AND);
}
/* conversion between fixed size types */
typedef union {
double d;
uint64_t u;
} Float64Union;
int bf_get_float64(const bf_t *a, double *pres, bf_rnd_t rnd_mode)
{
Float64Union u;
int e, ret;
uint64_t m;
ret = 0;
if (a->expn == BF_EXP_NAN) {
u.u = 0x7ff8000000000000; /* quiet nan */
} else {
bf_t b_s, *b = &b_s;
bf_init(a->ctx, b);
bf_set(b, a);
if (bf_is_finite(b)) {
ret = bf_round(b, 53, rnd_mode | BF_FLAG_SUBNORMAL | bf_set_exp_bits(11));
}
if (b->expn == BF_EXP_INF) {
e = (1 << 11) - 1;
m = 0;
} else if (b->expn == BF_EXP_ZERO) {
e = 0;
m = 0;
} else {
e = b->expn + 1023 - 1;
#if LIMB_BITS == 32
if (b->len == 2) {
m = ((uint64_t)b->tab[1] << 32) | b->tab[0];
} else {
m = ((uint64_t)b->tab[0] << 32);
}
#else
m = b->tab[0];
#endif
if (e <= 0) {
/* subnormal */
m = m >> (12 - e);
e = 0;
} else {
m = (m << 1) >> 12;
}
}
u.u = m | ((uint64_t)e << 52) | ((uint64_t)b->sign << 63);
bf_delete(b);
}
*pres = u.d;
return ret;
}
int bf_set_float64(bf_t *a, double d)
{
Float64Union u;
uint64_t m;
int shift, e, sgn;
u.d = d;
sgn = u.u >> 63;
e = (u.u >> 52) & ((1 << 11) - 1);
m = u.u & (((uint64_t)1 << 52) - 1);
if (e == ((1 << 11) - 1)) {
if (m != 0) {
bf_set_nan(a);
} else {
bf_set_inf(a, sgn);
}
} else if (e == 0) {
if (m == 0) {
bf_set_zero(a, sgn);
} else {
/* subnormal number */
m <<= 12;
shift = clz64(m);
m <<= shift;
e = -shift;
goto norm;
}
} else {
m = (m << 11) | ((uint64_t)1 << 63);
norm:
a->expn = e - 1023 + 1;
#if LIMB_BITS == 32
if (bf_resize(a, 2))
goto fail;
a->tab[0] = m;
a->tab[1] = m >> 32;
#else
if (bf_resize(a, 1))
goto fail;
a->tab[0] = m;
#endif
a->sign = sgn;
}
return 0;
fail:
bf_set_nan(a);
return BF_ST_MEM_ERROR;
}
/* The rounding mode is always BF_RNDZ. Return BF_ST_INVALID_OP if there
is an overflow and 0 otherwise. */
int bf_get_int32(int *pres, const bf_t *a, int flags)
{
uint32_t v;
int ret;
if (a->expn >= BF_EXP_INF) {
ret = BF_ST_INVALID_OP;
if (flags & BF_GET_INT_MOD) {
v = 0;
} else if (a->expn == BF_EXP_INF) {
v = (uint32_t)INT32_MAX + a->sign;
} else {
v = INT32_MAX;
}
} else if (a->expn <= 0) {
v = 0;
ret = 0;
} else if (a->expn <= 31) {
v = a->tab[a->len - 1] >> (LIMB_BITS - a->expn);
if (a->sign)
v = -v;
ret = 0;
} else if (!(flags & BF_GET_INT_MOD)) {
ret = BF_ST_INVALID_OP;
if (a->sign) {
v = (uint32_t)INT32_MAX + 1;
if (a->expn == 32 &&
(a->tab[a->len - 1] >> (LIMB_BITS - 32)) == v) {
ret = 0;
}
} else {
v = INT32_MAX;
}
} else {
v = get_bits(a->tab, a->len, a->len * LIMB_BITS - a->expn);
if (a->sign)
v = -v;
ret = 0;
}
*pres = v;
return ret;
}
/* The rounding mode is always BF_RNDZ. Return BF_ST_INVALID_OP if there
is an overflow and 0 otherwise. */
int bf_get_int64(int64_t *pres, const bf_t *a, int flags)
{
uint64_t v;
int ret;
if (a->expn >= BF_EXP_INF) {
ret = BF_ST_INVALID_OP;
if (flags & BF_GET_INT_MOD) {
v = 0;
} else if (a->expn == BF_EXP_INF) {
v = (uint64_t)INT64_MAX + a->sign;
} else {
v = INT64_MAX;
}
} else if (a->expn <= 0) {
v = 0;
ret = 0;
} else if (a->expn <= 63) {
#if LIMB_BITS == 32
if (a->expn <= 32)
v = a->tab[a->len - 1] >> (LIMB_BITS - a->expn);
else
v = (((uint64_t)a->tab[a->len - 1] << 32) |