quickjs/libbf.c

8467 lines
235 KiB
C
Raw Permalink Normal View History

2020-09-06 16:53:08 +00:00
/*
* Tiny arbitrary precision floating point library
*
2021-03-27 10:17:31 +00:00
* Copyright (c) 2017-2021 Fabrice Bellard
2020-09-06 16:53:08 +00:00
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <math.h>
#include <string.h>
#include <assert.h>
#ifdef __AVX2__
#include <immintrin.h>
#endif
#include "cutils.h"
#include "libbf.h"
/* enable it to check the multiplication result */
//#define USE_MUL_CHECK
/* enable it to use FFT/NTT multiplication */
#define USE_FFT_MUL
/* enable decimal floating point support */
#define USE_BF_DEC
//#define inline __attribute__((always_inline))
#ifdef __AVX2__
#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */
#else
#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */
#endif
/* XXX: adjust */
#define DIVNORM_LARGE_THRESHOLD 50
#define UDIV1NORM_THRESHOLD 3
#if LIMB_BITS == 64
#define FMT_LIMB1 "%" PRIx64
#define FMT_LIMB "%016" PRIx64
#define PRId_LIMB PRId64
#define PRIu_LIMB PRIu64
#else
#define FMT_LIMB1 "%x"
#define FMT_LIMB "%08x"
#define PRId_LIMB "d"
#define PRIu_LIMB "u"
#endif
typedef intptr_t mp_size_t;
typedef int bf_op2_func_t(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags);
#ifdef USE_FFT_MUL
#define FFT_MUL_R_OVERLAP_A (1 << 0)
#define FFT_MUL_R_OVERLAP_B (1 << 1)
#define FFT_MUL_R_NORESIZE (1 << 2)
static no_inline int fft_mul(bf_context_t *s,
bf_t *res, limb_t *a_tab, limb_t a_len,
limb_t *b_tab, limb_t b_len, int mul_flags);
static void fft_clear_cache(bf_context_t *s);
#endif
#ifdef USE_BF_DEC
static limb_t get_digit(const limb_t *tab, limb_t len, slimb_t pos);
#endif
/* could leading zeros */
static inline int clz(limb_t a)
{
if (a == 0) {
return LIMB_BITS;
} else {
#if LIMB_BITS == 64
return clz64(a);
#else
return clz32(a);
#endif
}
}
static inline int ctz(limb_t a)
{
if (a == 0) {
return LIMB_BITS;
} else {
#if LIMB_BITS == 64
return ctz64(a);
#else
return ctz32(a);
#endif
}
}
static inline int ceil_log2(limb_t a)
{
if (a <= 1)
return 0;
else
return LIMB_BITS - clz(a - 1);
}
/* b must be >= 1 */
static inline slimb_t ceil_div(slimb_t a, slimb_t b)
{
if (a >= 0)
return (a + b - 1) / b;
else
return a / b;
}
/* b must be >= 1 */
static inline slimb_t floor_div(slimb_t a, slimb_t b)
{
if (a >= 0) {
return a / b;
} else {
return (a - b + 1) / b;
}
}
/* return r = a modulo b (0 <= r <= b - 1. b must be >= 1 */
static inline limb_t smod(slimb_t a, slimb_t b)
{
a = a % (slimb_t)b;
if (a < 0)
a += b;
return a;
}
2020-09-06 16:57:11 +00:00
/* signed addition with saturation */
static inline slimb_t sat_add(slimb_t a, slimb_t b)
{
slimb_t r;
r = a + b;
/* overflow ? */
if (((a ^ r) & (b ^ r)) < 0)
r = (a >> (LIMB_BITS - 1)) ^ (((limb_t)1 << (LIMB_BITS - 1)) - 1);
return r;
}
2020-09-06 16:53:08 +00:00
#define malloc(s) malloc_is_forbidden(s)
#define free(p) free_is_forbidden(p)
#define realloc(p, s) realloc_is_forbidden(p, s)
void bf_context_init(bf_context_t *s, bf_realloc_func_t *realloc_func,
void *realloc_opaque)
{
memset(s, 0, sizeof(*s));
s->realloc_func = realloc_func;
s->realloc_opaque = realloc_opaque;
}
void bf_context_end(bf_context_t *s)
{
bf_clear_cache(s);
}
void bf_init(bf_context_t *s, bf_t *r)
{
r->ctx = s;
r->sign = 0;
r->expn = BF_EXP_ZERO;
r->len = 0;
r->tab = NULL;
}
/* return 0 if OK, -1 if alloc error */
int bf_resize(bf_t *r, limb_t len)
{
limb_t *tab;
if (len != r->len) {
tab = bf_realloc(r->ctx, r->tab, len * sizeof(limb_t));
if (!tab && len != 0)
return -1;
r->tab = tab;
r->len = len;
}
return 0;
}
/* return 0 or BF_ST_MEM_ERROR */
int bf_set_ui(bf_t *r, uint64_t a)
{
r->sign = 0;
if (a == 0) {
r->expn = BF_EXP_ZERO;
bf_resize(r, 0); /* cannot fail */
}
#if LIMB_BITS == 32
else if (a <= 0xffffffff)
#else
else
#endif
{
int shift;
if (bf_resize(r, 1))
goto fail;
shift = clz(a);
r->tab[0] = a << shift;
r->expn = LIMB_BITS - shift;
}
#if LIMB_BITS == 32
else {
uint32_t a1, a0;
int shift;
if (bf_resize(r, 2))
goto fail;
a0 = a;
a1 = a >> 32;
shift = clz(a1);
r->tab[0] = a0 << shift;
r->tab[1] = (a1 << shift) | (a0 >> (LIMB_BITS - shift));
r->expn = 2 * LIMB_BITS - shift;
}
#endif
return 0;
fail:
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
/* return 0 or BF_ST_MEM_ERROR */
int bf_set_si(bf_t *r, int64_t a)
{
int ret;
if (a < 0) {
ret = bf_set_ui(r, -a);
r->sign = 1;
} else {
ret = bf_set_ui(r, a);
}
return ret;
}
void bf_set_nan(bf_t *r)
{
bf_resize(r, 0); /* cannot fail */
r->expn = BF_EXP_NAN;
r->sign = 0;
}
void bf_set_zero(bf_t *r, int is_neg)
{
bf_resize(r, 0); /* cannot fail */
r->expn = BF_EXP_ZERO;
r->sign = is_neg;
}
void bf_set_inf(bf_t *r, int is_neg)
{
bf_resize(r, 0); /* cannot fail */
r->expn = BF_EXP_INF;
r->sign = is_neg;
}
/* return 0 or BF_ST_MEM_ERROR */
int bf_set(bf_t *r, const bf_t *a)
{
if (r == a)
return 0;
if (bf_resize(r, a->len)) {
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
r->sign = a->sign;
r->expn = a->expn;
memcpy(r->tab, a->tab, a->len * sizeof(limb_t));
return 0;
}
/* equivalent to bf_set(r, a); bf_delete(a) */
void bf_move(bf_t *r, bf_t *a)
{
bf_context_t *s = r->ctx;
if (r == a)
return;
bf_free(s, r->tab);
*r = *a;
}
static limb_t get_limbz(const bf_t *a, limb_t idx)
{
if (idx >= a->len)
return 0;
else
return a->tab[idx];
}
/* get LIMB_BITS at bit position 'pos' in tab */
static inline limb_t get_bits(const limb_t *tab, limb_t len, slimb_t pos)
{
limb_t i, a0, a1;
int p;
i = pos >> LIMB_LOG2_BITS;
p = pos & (LIMB_BITS - 1);
if (i < len)
a0 = tab[i];
else
a0 = 0;
if (p == 0) {
return a0;
} else {
i++;
if (i < len)
a1 = tab[i];
else
a1 = 0;
return (a0 >> p) | (a1 << (LIMB_BITS - p));
}
}
static inline limb_t get_bit(const limb_t *tab, limb_t len, slimb_t pos)
{
slimb_t i;
i = pos >> LIMB_LOG2_BITS;
if (i < 0 || i >= len)
return 0;
return (tab[i] >> (pos & (LIMB_BITS - 1))) & 1;
}
static inline limb_t limb_mask(int start, int last)
{
limb_t v;
int n;
n = last - start + 1;
if (n == LIMB_BITS)
v = -1;
else
v = (((limb_t)1 << n) - 1) << start;
return v;
}
static limb_t mp_scan_nz(const limb_t *tab, mp_size_t n)
{
mp_size_t i;
for(i = 0; i < n; i++) {
if (tab[i] != 0)
return 1;
}
return 0;
}
/* return != 0 if one bit between 0 and bit_pos inclusive is not zero. */
static inline limb_t scan_bit_nz(const bf_t *r, slimb_t bit_pos)
{
slimb_t pos;
limb_t v;
pos = bit_pos >> LIMB_LOG2_BITS;
if (pos < 0)
return 0;
v = r->tab[pos] & limb_mask(0, bit_pos & (LIMB_BITS - 1));
if (v != 0)
return 1;
pos--;
while (pos >= 0) {
if (r->tab[pos] != 0)
return 1;
pos--;
}
return 0;
}
/* return the addend for rounding. Note that prec can be <= 0 (for
BF_FLAG_RADPNT_PREC) */
static int bf_get_rnd_add(int *pret, const bf_t *r, limb_t l,
slimb_t prec, int rnd_mode)
{
int add_one, inexact;
limb_t bit1, bit0;
if (rnd_mode == BF_RNDF) {
bit0 = 1; /* faithful rounding does not honor the INEXACT flag */
} else {
/* starting limb for bit 'prec + 1' */
bit0 = scan_bit_nz(r, l * LIMB_BITS - 1 - bf_max(0, prec + 1));
}
/* get the bit at 'prec' */
bit1 = get_bit(r->tab, l, l * LIMB_BITS - 1 - prec);
inexact = (bit1 | bit0) != 0;
add_one = 0;
switch(rnd_mode) {
case BF_RNDZ:
break;
case BF_RNDN:
if (bit1) {
if (bit0) {
add_one = 1;
} else {
/* round to even */
add_one =
get_bit(r->tab, l, l * LIMB_BITS - 1 - (prec - 1));
}
}
break;
case BF_RNDD:
case BF_RNDU:
if (r->sign == (rnd_mode == BF_RNDD))
add_one = inexact;
break;
2020-09-06 16:57:11 +00:00
case BF_RNDA:
add_one = inexact;
break;
2020-09-06 16:53:08 +00:00
case BF_RNDNA:
case BF_RNDF:
add_one = bit1;
break;
default:
abort();
}
if (inexact)
*pret |= BF_ST_INEXACT;
return add_one;
}
static int bf_set_overflow(bf_t *r, int sign, limb_t prec, bf_flags_t flags)
{
slimb_t i, l, e_max;
int rnd_mode;
rnd_mode = flags & BF_RND_MASK;
if (prec == BF_PREC_INF ||
rnd_mode == BF_RNDN ||
rnd_mode == BF_RNDNA ||
2020-09-06 16:57:11 +00:00
rnd_mode == BF_RNDA ||
2020-09-06 16:53:08 +00:00
(rnd_mode == BF_RNDD && sign == 1) ||
(rnd_mode == BF_RNDU && sign == 0)) {
bf_set_inf(r, sign);
} else {
/* set to maximum finite number */
l = (prec + LIMB_BITS - 1) / LIMB_BITS;
if (bf_resize(r, l)) {
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
r->tab[0] = limb_mask((-prec) & (LIMB_BITS - 1),
LIMB_BITS - 1);
for(i = 1; i < l; i++)
r->tab[i] = (limb_t)-1;
e_max = (limb_t)1 << (bf_get_exp_bits(flags) - 1);
r->expn = e_max;
r->sign = sign;
}
return BF_ST_OVERFLOW | BF_ST_INEXACT;
}
/* round to prec1 bits assuming 'r' is non zero and finite. 'r' is
assumed to have length 'l' (1 <= l <= r->len). Note: 'prec1' can be
2020-09-06 16:57:11 +00:00
infinite (BF_PREC_INF). 'ret' is 0 or BF_ST_INEXACT if the result
is known to be inexact. Can fail with BF_ST_MEM_ERROR in case of
2020-09-06 16:53:08 +00:00
overflow not returning infinity. */
2020-09-06 16:57:11 +00:00
static int __bf_round(bf_t *r, limb_t prec1, bf_flags_t flags, limb_t l,
int ret)
2020-09-06 16:53:08 +00:00
{
limb_t v, a;
2020-09-06 16:57:11 +00:00
int shift, add_one, rnd_mode;
2020-09-06 16:53:08 +00:00
slimb_t i, bit_pos, pos, e_min, e_max, e_range, prec;
/* e_min and e_max are computed to match the IEEE 754 conventions */
e_range = (limb_t)1 << (bf_get_exp_bits(flags) - 1);
e_min = -e_range + 3;
e_max = e_range;
if (flags & BF_FLAG_RADPNT_PREC) {
/* 'prec' is the precision after the radix point */
if (prec1 != BF_PREC_INF)
prec = r->expn + prec1;
else
prec = prec1;
} else if (unlikely(r->expn < e_min) && (flags & BF_FLAG_SUBNORMAL)) {
/* restrict the precision in case of potentially subnormal
result */
assert(prec1 != BF_PREC_INF);
prec = prec1 - (e_min - r->expn);
} else {
prec = prec1;
}
/* round to prec bits */
rnd_mode = flags & BF_RND_MASK;
add_one = bf_get_rnd_add(&ret, r, l, prec, rnd_mode);
if (prec <= 0) {
if (add_one) {
bf_resize(r, 1); /* cannot fail */
r->tab[0] = (limb_t)1 << (LIMB_BITS - 1);
r->expn += 1 - prec;
ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT;
return ret;
} else {
goto underflow;
}
} else if (add_one) {
limb_t carry;
/* add one starting at digit 'prec - 1' */
bit_pos = l * LIMB_BITS - 1 - (prec - 1);
pos = bit_pos >> LIMB_LOG2_BITS;
carry = (limb_t)1 << (bit_pos & (LIMB_BITS - 1));
for(i = pos; i < l; i++) {
v = r->tab[i] + carry;
carry = (v < carry);
r->tab[i] = v;
if (carry == 0)
break;
}
if (carry) {
/* shift right by one digit */
v = 1;
for(i = l - 1; i >= pos; i--) {
a = r->tab[i];
r->tab[i] = (a >> 1) | (v << (LIMB_BITS - 1));
v = a;
}
r->expn++;
}
}
/* check underflow */
if (unlikely(r->expn < e_min)) {
if (flags & BF_FLAG_SUBNORMAL) {
/* if inexact, also set the underflow flag */
if (ret & BF_ST_INEXACT)
ret |= BF_ST_UNDERFLOW;
} else {
underflow:
ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT;
bf_set_zero(r, r->sign);
return ret;
}
}
/* check overflow */
if (unlikely(r->expn > e_max))
return bf_set_overflow(r, r->sign, prec1, flags);
/* keep the bits starting at 'prec - 1' */
bit_pos = l * LIMB_BITS - 1 - (prec - 1);
i = bit_pos >> LIMB_LOG2_BITS;
if (i >= 0) {
shift = bit_pos & (LIMB_BITS - 1);
if (shift != 0)
r->tab[i] &= limb_mask(shift, LIMB_BITS - 1);
} else {
i = 0;
}
/* remove trailing zeros */
while (r->tab[i] == 0)
i++;
if (i > 0) {
l -= i;
memmove(r->tab, r->tab + i, l * sizeof(limb_t));
}
bf_resize(r, l); /* cannot fail */
return ret;
}
/* 'r' must be a finite number. */
int bf_normalize_and_round(bf_t *r, limb_t prec1, bf_flags_t flags)
{
limb_t l, v, a;
int shift, ret;
slimb_t i;
// bf_print_str("bf_renorm", r);
l = r->len;
while (l > 0 && r->tab[l - 1] == 0)
l--;
if (l == 0) {
/* zero */
r->expn = BF_EXP_ZERO;
bf_resize(r, 0); /* cannot fail */
ret = 0;
} else {
r->expn -= (r->len - l) * LIMB_BITS;
/* shift to have the MSB set to '1' */
v = r->tab[l - 1];
shift = clz(v);
if (shift != 0) {
v = 0;
for(i = 0; i < l; i++) {
a = r->tab[i];
r->tab[i] = (a << shift) | (v >> (LIMB_BITS - shift));
v = a;
}
r->expn -= shift;
}
2020-09-06 16:57:11 +00:00
ret = __bf_round(r, prec1, flags, l, 0);
2020-09-06 16:53:08 +00:00
}
// bf_print_str("r_final", r);
return ret;
}
/* return true if rounding can be done at precision 'prec' assuming
the exact result r is such that |r-a| <= 2^(EXP(a)-k). */
/* XXX: check the case where the exponent would be incremented by the
rounding */
int bf_can_round(const bf_t *a, slimb_t prec, bf_rnd_t rnd_mode, slimb_t k)
{
BOOL is_rndn;
slimb_t bit_pos, n;
limb_t bit;
if (a->expn == BF_EXP_INF || a->expn == BF_EXP_NAN)
return FALSE;
if (rnd_mode == BF_RNDF) {
return (k >= (prec + 1));
}
if (a->expn == BF_EXP_ZERO)
return FALSE;
2020-09-06 16:57:11 +00:00
is_rndn = (rnd_mode == BF_RNDN || rnd_mode == BF_RNDNA);
2020-09-06 16:53:08 +00:00
if (k < (prec + 2))
return FALSE;
bit_pos = a->len * LIMB_BITS - 1 - prec;
n = k - prec;
/* bit pattern for RNDN or RNDNA: 0111.. or 1000...
for other rounding modes: 000... or 111...
*/
bit = get_bit(a->tab, a->len, bit_pos);
bit_pos--;
n--;
bit ^= is_rndn;
/* XXX: slow, but a few iterations on average */
while (n != 0) {
if (get_bit(a->tab, a->len, bit_pos) != bit)
return TRUE;
bit_pos--;
n--;
}
return FALSE;
}
/* Cannot fail with BF_ST_MEM_ERROR. */
int bf_round(bf_t *r, limb_t prec, bf_flags_t flags)
{
if (r->len == 0)
return 0;
2020-09-06 16:57:11 +00:00
return __bf_round(r, prec, flags, r->len, 0);
2020-09-06 16:53:08 +00:00
}
/* for debugging */
static __maybe_unused void dump_limbs(const char *str, const limb_t *tab, limb_t n)
{
limb_t i;
printf("%s: len=%" PRId_LIMB "\n", str, n);
for(i = 0; i < n; i++) {
printf("%" PRId_LIMB ": " FMT_LIMB "\n",
i, tab[i]);
}
}
void mp_print_str(const char *str, const limb_t *tab, limb_t n)
{
slimb_t i;
printf("%s= 0x", str);
for(i = n - 1; i >= 0; i--) {
if (i != (n - 1))
printf("_");
printf(FMT_LIMB, tab[i]);
}
printf("\n");
}
static __maybe_unused void mp_print_str_h(const char *str,
const limb_t *tab, limb_t n,
limb_t high)
{
slimb_t i;
printf("%s= 0x", str);
printf(FMT_LIMB, high);
for(i = n - 1; i >= 0; i--) {
printf("_");
printf(FMT_LIMB, tab[i]);
}
printf("\n");
}
/* for debugging */
void bf_print_str(const char *str, const bf_t *a)
{
slimb_t i;
printf("%s=", str);
if (a->expn == BF_EXP_NAN) {
printf("NaN");
} else {
if (a->sign)
putchar('-');
if (a->expn == BF_EXP_ZERO) {
putchar('0');
} else if (a->expn == BF_EXP_INF) {
printf("Inf");
} else {
printf("0x0.");
for(i = a->len - 1; i >= 0; i--)
printf(FMT_LIMB, a->tab[i]);
printf("p%" PRId_LIMB, a->expn);
}
}
printf("\n");
}
/* compare the absolute value of 'a' and 'b'. Return < 0 if a < b, 0
if a = b and > 0 otherwise. */
int bf_cmpu(const bf_t *a, const bf_t *b)
{
slimb_t i;
limb_t len, v1, v2;
if (a->expn != b->expn) {
if (a->expn < b->expn)
return -1;
else
return 1;
}
len = bf_max(a->len, b->len);
for(i = len - 1; i >= 0; i--) {
v1 = get_limbz(a, a->len - len + i);
v2 = get_limbz(b, b->len - len + i);
if (v1 != v2) {
if (v1 < v2)
return -1;
else
return 1;
}
}
return 0;
}
/* Full order: -0 < 0, NaN == NaN and NaN is larger than all other numbers */
int bf_cmp_full(const bf_t *a, const bf_t *b)
{
int res;
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
if (a->expn == b->expn)
res = 0;
else if (a->expn == BF_EXP_NAN)
res = 1;
else
res = -1;
} else if (a->sign != b->sign) {
res = 1 - 2 * a->sign;
} else {
res = bf_cmpu(a, b);
if (a->sign)
res = -res;
}
return res;
}
2020-09-06 16:57:11 +00:00
/* Standard floating point comparison: return 2 if one of the operands
is NaN (unordered) or -1, 0, 1 depending on the ordering assuming
-0 == +0 */
int bf_cmp(const bf_t *a, const bf_t *b)
2020-09-06 16:53:08 +00:00
{
int res;
2020-09-06 16:57:11 +00:00
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
res = 2;
} else if (a->sign != b->sign) {
if (a->expn == BF_EXP_ZERO && b->expn == BF_EXP_ZERO)
res = 0;
else
res = 1 - 2 * a->sign;
2020-09-06 16:53:08 +00:00
} else {
res = bf_cmpu(a, b);
2020-09-06 16:57:11 +00:00
if (a->sign)
res = -res;
2020-09-06 16:53:08 +00:00
}
2020-09-06 16:57:11 +00:00
return res;
2020-09-06 16:53:08 +00:00
}
/* Compute the number of bits 'n' matching the pattern:
a= X1000..0
b= X0111..1
When computing a-b, the result will have at least n leading zero
bits.
Precondition: a > b and a.expn - b.expn = 0 or 1
*/
static limb_t count_cancelled_bits(const bf_t *a, const bf_t *b)
{
slimb_t bit_offset, b_offset, n;
int p, p1;
limb_t v1, v2, mask;
bit_offset = a->len * LIMB_BITS - 1;
b_offset = (b->len - a->len) * LIMB_BITS - (LIMB_BITS - 1) +
a->expn - b->expn;
n = 0;
/* first search the equals bits */
for(;;) {
v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS);
v2 = get_bits(b->tab, b->len, bit_offset + b_offset);
// printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2);
if (v1 != v2)
break;
n += LIMB_BITS;
bit_offset -= LIMB_BITS;
}
/* find the position of the first different bit */
p = clz(v1 ^ v2) + 1;
n += p;
/* then search for '0' in a and '1' in b */
p = LIMB_BITS - p;
if (p > 0) {
/* search in the trailing p bits of v1 and v2 */
mask = limb_mask(0, p - 1);
p1 = bf_min(clz(v1 & mask), clz((~v2) & mask)) - (LIMB_BITS - p);
n += p1;
if (p1 != p)
goto done;
}
bit_offset -= LIMB_BITS;
for(;;) {
v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS);
v2 = get_bits(b->tab, b->len, bit_offset + b_offset);
// printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2);
if (v1 != 0 || v2 != -1) {
/* different: count the matching bits */
p1 = bf_min(clz(v1), clz(~v2));
n += p1;
break;
}
n += LIMB_BITS;
bit_offset -= LIMB_BITS;
}
done:
return n;
}
static int bf_add_internal(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags, int b_neg)
{
const bf_t *tmp;
int is_sub, ret, cmp_res, a_sign, b_sign;
a_sign = a->sign;
b_sign = b->sign ^ b_neg;
is_sub = a_sign ^ b_sign;
cmp_res = bf_cmpu(a, b);
if (cmp_res < 0) {
tmp = a;
a = b;
b = tmp;
a_sign = b_sign; /* b_sign is never used later */
}
/* abs(a) >= abs(b) */
if (cmp_res == 0 && is_sub && a->expn < BF_EXP_INF) {
/* zero result */
bf_set_zero(r, (flags & BF_RND_MASK) == BF_RNDD);
ret = 0;
} else if (a->len == 0 || b->len == 0) {
ret = 0;
if (a->expn >= BF_EXP_INF) {
if (a->expn == BF_EXP_NAN) {
/* at least one operand is NaN */
bf_set_nan(r);
} else if (b->expn == BF_EXP_INF && is_sub) {
/* infinities with different signs */
bf_set_nan(r);
ret = BF_ST_INVALID_OP;
} else {
bf_set_inf(r, a_sign);
}
} else {
/* at least one zero and not subtract */
bf_set(r, a);
r->sign = a_sign;
goto renorm;
}
} else {
slimb_t d, a_offset, b_bit_offset, i, cancelled_bits;
limb_t carry, v1, v2, u, r_len, carry1, precl, tot_len, z, sub_mask;
r->sign = a_sign;
r->expn = a->expn;
d = a->expn - b->expn;
/* must add more precision for the leading cancelled bits in
subtraction */
if (is_sub) {
if (d <= 1)
cancelled_bits = count_cancelled_bits(a, b);
else
cancelled_bits = 1;
} else {
cancelled_bits = 0;
}
/* add two extra bits for rounding */
precl = (cancelled_bits + prec + 2 + LIMB_BITS - 1) / LIMB_BITS;
tot_len = bf_max(a->len, b->len + (d + LIMB_BITS - 1) / LIMB_BITS);
r_len = bf_min(precl, tot_len);
if (bf_resize(r, r_len))
goto fail;
a_offset = a->len - r_len;
b_bit_offset = (b->len - r_len) * LIMB_BITS + d;
/* compute the bits before for the rounding */
carry = is_sub;
z = 0;
sub_mask = -is_sub;
i = r_len - tot_len;
while (i < 0) {
slimb_t ap, bp;
BOOL inflag;
ap = a_offset + i;
bp = b_bit_offset + i * LIMB_BITS;
inflag = FALSE;
if (ap >= 0 && ap < a->len) {
v1 = a->tab[ap];
inflag = TRUE;
} else {
v1 = 0;
}
if (bp + LIMB_BITS > 0 && bp < (slimb_t)(b->len * LIMB_BITS)) {
v2 = get_bits(b->tab, b->len, bp);
inflag = TRUE;
} else {
v2 = 0;
}
if (!inflag) {
/* outside 'a' and 'b': go directly to the next value
inside a or b so that the running time does not
depend on the exponent difference */
i = 0;
if (ap < 0)
i = bf_min(i, -a_offset);
/* b_bit_offset + i * LIMB_BITS + LIMB_BITS >= 1
equivalent to
i >= ceil(-b_bit_offset + 1 - LIMB_BITS) / LIMB_BITS)
*/
if (bp + LIMB_BITS <= 0)
i = bf_min(i, (-b_bit_offset) >> LIMB_LOG2_BITS);
} else {
i++;
}
v2 ^= sub_mask;
u = v1 + v2;
carry1 = u < v1;
u += carry;
carry = (u < carry) | carry1;
z |= u;
}
/* and the result */
for(i = 0; i < r_len; i++) {
v1 = get_limbz(a, a_offset + i);
v2 = get_bits(b->tab, b->len, b_bit_offset + i * LIMB_BITS);
v2 ^= sub_mask;
u = v1 + v2;
carry1 = u < v1;
u += carry;
carry = (u < carry) | carry1;
r->tab[i] = u;
}
/* set the extra bits for the rounding */
r->tab[0] |= (z != 0);
/* carry is only possible in add case */
if (!is_sub && carry) {
if (bf_resize(r, r_len + 1))
goto fail;
r->tab[r_len] = 1;
r->expn += LIMB_BITS;
}
renorm:
ret = bf_normalize_and_round(r, prec, flags);
}
return ret;
fail:
bf_set_nan(r);
return BF_ST_MEM_ERROR;
}
static int __bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
return bf_add_internal(r, a, b, prec, flags, 0);
}
static int __bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
bf_flags_t flags)
{
return bf_add_internal(r, a, b, prec, flags, 1);
}
limb_t mp_add(limb_t *res, const limb_t *op1, const limb_t *op2,
limb_t n, limb_t carry)
{
slimb_t i;
limb_t k, a, v, k1;
k = carry;
for(i=0;i<n;i++) {
v = op1[i];
a = v + op2[i];
k1 = a < v;
a = a + k;
k = (a < k) | k1;
res[i] = a;
}
return k;
}
limb_t mp_add_ui(limb_t *tab, limb_t b, size_t n)
{
size_t i;
limb_t k, a;
k=b;
for(i=0;i<n;i++) {
if (k == 0)
break;
a = tab[i] + k;
k = (a < k);
tab[i] = a;
}
return k;
}
limb_t mp_sub(limb_t *res, const limb_t *op1, const limb_t *op2,
mp_size_t n, limb_t carry)
{
int i;
limb_t k, a, v, k1;
k = carry;
for(i=0;i<n;i++) {
v = op1[i];
a = v - op2[i];
k1 = a > v;
v = a - k;
k = (v > a) | k1;
res[i] = v;
}
return k;
}
/* compute 0 - op2 */
static limb_t mp_neg(limb_t *res, const limb_t *op2, mp_size_t n, limb_t carry)
{
int i;
limb_t k, a, v, k1;
k = carry;
for(i=0;i<n;i++) {
v = 0;
a = v - op2[i];
k1 = a > v;
v = a - k;
k = (v > a) | k1;
res[i] = v;
}
return k;
}
limb_t mp_sub_ui(limb_t *tab, limb_t b, mp_size_t n)
{
mp_size_t i;
limb_t k, a, v;
k=b;
for(i=0;i<n;i++) {
v = tab[i];
a = v - k;
k = a > v;
tab[i] = a;
if (k == 0)
break;
}
return k;
}
/* r = (a + high*B^n) >> shift. Return the remainder r (0 <= r < 2^shift).
1 <= shift <= LIMB_BITS - 1 */
static limb_t mp_shr(limb_t *tab_r, const limb_t *tab, mp_size_t n,
int shift, limb_t high)
{
mp_size_t i;
limb_t l, a;
assert(shift >= 1 && shift < LIMB_BITS);
l = high;
for(i = n - 1; i >= 0; i--) {
a = tab[i];
tab_r[i] = (a >> shift) | (l << (LIMB_BITS - shift));
l = a;
}
return l & (((limb_t)1 << shift) - 1);
}
/* tabr[] = taba[] * b + l. Return the high carry */
static limb_t mp_mul1(limb_t *tabr, const limb_t *taba, limb_t n,
limb_t b, limb_t l)
{
limb_t i;
dlimb_t t;
for(i = 0; i < n; i++) {
t = (dlimb_t)taba[i] * (dlimb_t)b + l;