2020-09-06 16:53:08 +00:00
|
|
|
/*
|
|
|
|
* Tiny arbitrary precision floating point library
|
|
|
|
*
|
2021-03-27 10:17:31 +00:00
|
|
|
* Copyright (c) 2017-2021 Fabrice Bellard
|
2020-09-06 16:53:08 +00:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <inttypes.h>
|
|
|
|
#include <math.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#ifdef __AVX2__
|
|
|
|
#include <immintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "cutils.h"
|
|
|
|
#include "libbf.h"
|
|
|
|
|
|
|
|
/* enable it to check the multiplication result */
|
|
|
|
//#define USE_MUL_CHECK
|
|
|
|
/* enable it to use FFT/NTT multiplication */
|
|
|
|
#define USE_FFT_MUL
|
|
|
|
/* enable decimal floating point support */
|
|
|
|
#define USE_BF_DEC
|
|
|
|
|
|
|
|
//#define inline __attribute__((always_inline))
|
|
|
|
|
|
|
|
#ifdef __AVX2__
|
|
|
|
#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */
|
|
|
|
#else
|
|
|
|
#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* XXX: adjust */
|
|
|
|
#define DIVNORM_LARGE_THRESHOLD 50
|
|
|
|
#define UDIV1NORM_THRESHOLD 3
|
|
|
|
|
|
|
|
#if LIMB_BITS == 64
|
|
|
|
#define FMT_LIMB1 "%" PRIx64
|
|
|
|
#define FMT_LIMB "%016" PRIx64
|
|
|
|
#define PRId_LIMB PRId64
|
|
|
|
#define PRIu_LIMB PRIu64
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define FMT_LIMB1 "%x"
|
|
|
|
#define FMT_LIMB "%08x"
|
|
|
|
#define PRId_LIMB "d"
|
|
|
|
#define PRIu_LIMB "u"
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef intptr_t mp_size_t;
|
|
|
|
|
|
|
|
typedef int bf_op2_func_t(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
|
|
|
|
bf_flags_t flags);
|
|
|
|
|
|
|
|
#ifdef USE_FFT_MUL
|
|
|
|
|
|
|
|
#define FFT_MUL_R_OVERLAP_A (1 << 0)
|
|
|
|
#define FFT_MUL_R_OVERLAP_B (1 << 1)
|
|
|
|
#define FFT_MUL_R_NORESIZE (1 << 2)
|
|
|
|
|
|
|
|
static no_inline int fft_mul(bf_context_t *s,
|
|
|
|
bf_t *res, limb_t *a_tab, limb_t a_len,
|
|
|
|
limb_t *b_tab, limb_t b_len, int mul_flags);
|
|
|
|
static void fft_clear_cache(bf_context_t *s);
|
|
|
|
#endif
|
|
|
|
#ifdef USE_BF_DEC
|
|
|
|
static limb_t get_digit(const limb_t *tab, limb_t len, slimb_t pos);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* could leading zeros */
|
|
|
|
static inline int clz(limb_t a)
|
|
|
|
{
|
|
|
|
if (a == 0) {
|
|
|
|
return LIMB_BITS;
|
|
|
|
} else {
|
|
|
|
#if LIMB_BITS == 64
|
|
|
|
return clz64(a);
|
|
|
|
#else
|
|
|
|
return clz32(a);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ctz(limb_t a)
|
|
|
|
{
|
|
|
|
if (a == 0) {
|
|
|
|
return LIMB_BITS;
|
|
|
|
} else {
|
|
|
|
#if LIMB_BITS == 64
|
|
|
|
return ctz64(a);
|
|
|
|
#else
|
|
|
|
return ctz32(a);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ceil_log2(limb_t a)
|
|
|
|
{
|
|
|
|
if (a <= 1)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return LIMB_BITS - clz(a - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* b must be >= 1 */
|
|
|
|
static inline slimb_t ceil_div(slimb_t a, slimb_t b)
|
|
|
|
{
|
|
|
|
if (a >= 0)
|
|
|
|
return (a + b - 1) / b;
|
|
|
|
else
|
|
|
|
return a / b;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* b must be >= 1 */
|
|
|
|
static inline slimb_t floor_div(slimb_t a, slimb_t b)
|
|
|
|
{
|
|
|
|
if (a >= 0) {
|
|
|
|
return a / b;
|
|
|
|
} else {
|
|
|
|
return (a - b + 1) / b;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return r = a modulo b (0 <= r <= b - 1. b must be >= 1 */
|
|
|
|
static inline limb_t smod(slimb_t a, slimb_t b)
|
|
|
|
{
|
|
|
|
a = a % (slimb_t)b;
|
|
|
|
if (a < 0)
|
|
|
|
a += b;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
2020-09-06 16:57:11 +00:00
|
|
|
/* signed addition with saturation */
|
|
|
|
static inline slimb_t sat_add(slimb_t a, slimb_t b)
|
|
|
|
{
|
|
|
|
slimb_t r;
|
|
|
|
r = a + b;
|
|
|
|
/* overflow ? */
|
|
|
|
if (((a ^ r) & (b ^ r)) < 0)
|
|
|
|
r = (a >> (LIMB_BITS - 1)) ^ (((limb_t)1 << (LIMB_BITS - 1)) - 1);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2020-09-06 16:53:08 +00:00
|
|
|
#define malloc(s) malloc_is_forbidden(s)
|
|
|
|
#define free(p) free_is_forbidden(p)
|
|
|
|
#define realloc(p, s) realloc_is_forbidden(p, s)
|
|
|
|
|
|
|
|
void bf_context_init(bf_context_t *s, bf_realloc_func_t *realloc_func,
|
|
|
|
void *realloc_opaque)
|
|
|
|
{
|
|
|
|
memset(s, 0, sizeof(*s));
|
|
|
|
s->realloc_func = realloc_func;
|
|
|
|
s->realloc_opaque = realloc_opaque;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bf_context_end(bf_context_t *s)
|
|
|
|
{
|
|
|
|
bf_clear_cache(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
void bf_init(bf_context_t *s, bf_t *r)
|
|
|
|
{
|
|
|
|
r->ctx = s;
|
|
|
|
r->sign = 0;
|
|
|
|
r->expn = BF_EXP_ZERO;
|
|
|
|
r->len = 0;
|
|
|
|
r->tab = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return 0 if OK, -1 if alloc error */
|
|
|
|
int bf_resize(bf_t *r, limb_t len)
|
|
|
|
{
|
|
|
|
limb_t *tab;
|
|
|
|
|
|
|
|
if (len != r->len) {
|
|
|
|
tab = bf_realloc(r->ctx, r->tab, len * sizeof(limb_t));
|
|
|
|
if (!tab && len != 0)
|
|
|
|
return -1;
|
|
|
|
r->tab = tab;
|
|
|
|
r->len = len;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return 0 or BF_ST_MEM_ERROR */
|
|
|
|
int bf_set_ui(bf_t *r, uint64_t a)
|
|
|
|
{
|
|
|
|
r->sign = 0;
|
|
|
|
if (a == 0) {
|
|
|
|
r->expn = BF_EXP_ZERO;
|
|
|
|
bf_resize(r, 0); /* cannot fail */
|
|
|
|
}
|
|
|
|
#if LIMB_BITS == 32
|
|
|
|
else if (a <= 0xffffffff)
|
|
|
|
#else
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
int shift;
|
|
|
|
if (bf_resize(r, 1))
|
|
|
|
goto fail;
|
|
|
|
shift = clz(a);
|
|
|
|
r->tab[0] = a << shift;
|
|
|
|
r->expn = LIMB_BITS - shift;
|
|
|
|
}
|
|
|
|
#if LIMB_BITS == 32
|
|
|
|
else {
|
|
|
|
uint32_t a1, a0;
|
|
|
|
int shift;
|
|
|
|
if (bf_resize(r, 2))
|
|
|
|
goto fail;
|
|
|
|
a0 = a;
|
|
|
|
a1 = a >> 32;
|
|
|
|
shift = clz(a1);
|
|
|
|
r->tab[0] = a0 << shift;
|
|
|
|
r->tab[1] = (a1 << shift) | (a0 >> (LIMB_BITS - shift));
|
|
|
|
r->expn = 2 * LIMB_BITS - shift;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
|
|
bf_set_nan(r);
|
|
|
|
return BF_ST_MEM_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return 0 or BF_ST_MEM_ERROR */
|
|
|
|
int bf_set_si(bf_t *r, int64_t a)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (a < 0) {
|
|
|
|
ret = bf_set_ui(r, -a);
|
|
|
|
r->sign = 1;
|
|
|
|
} else {
|
|
|
|
ret = bf_set_ui(r, a);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bf_set_nan(bf_t *r)
|
|
|
|
{
|
|
|
|
bf_resize(r, 0); /* cannot fail */
|
|
|
|
r->expn = BF_EXP_NAN;
|
|
|
|
r->sign = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bf_set_zero(bf_t *r, int is_neg)
|
|
|
|
{
|
|
|
|
bf_resize(r, 0); /* cannot fail */
|
|
|
|
r->expn = BF_EXP_ZERO;
|
|
|
|
r->sign = is_neg;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bf_set_inf(bf_t *r, int is_neg)
|
|
|
|
{
|
|
|
|
bf_resize(r, 0); /* cannot fail */
|
|
|
|
r->expn = BF_EXP_INF;
|
|
|
|
r->sign = is_neg;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return 0 or BF_ST_MEM_ERROR */
|
|
|
|
int bf_set(bf_t *r, const bf_t *a)
|
|
|
|
{
|
|
|
|
if (r == a)
|
|
|
|
return 0;
|
|
|
|
if (bf_resize(r, a->len)) {
|
|
|
|
bf_set_nan(r);
|
|
|
|
return BF_ST_MEM_ERROR;
|
|
|
|
}
|
|
|
|
r->sign = a->sign;
|
|
|
|
r->expn = a->expn;
|
|
|
|
memcpy(r->tab, a->tab, a->len * sizeof(limb_t));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* equivalent to bf_set(r, a); bf_delete(a) */
|
|
|
|
void bf_move(bf_t *r, bf_t *a)
|
|
|
|
{
|
|
|
|
bf_context_t *s = r->ctx;
|
|
|
|
if (r == a)
|
|
|
|
return;
|
|
|
|
bf_free(s, r->tab);
|
|
|
|
*r = *a;
|
|
|
|
}
|
|
|
|
|
|
|
|
static limb_t get_limbz(const bf_t *a, limb_t idx)
|
|
|
|
{
|
|
|
|
if (idx >= a->len)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return a->tab[idx];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get LIMB_BITS at bit position 'pos' in tab */
|
|
|
|
static inline limb_t get_bits(const limb_t *tab, limb_t len, slimb_t pos)
|
|
|
|
{
|
|
|
|
limb_t i, a0, a1;
|
|
|
|
int p;
|
|
|
|
|
|
|
|
i = pos >> LIMB_LOG2_BITS;
|
|
|
|
p = pos & (LIMB_BITS - 1);
|
|
|
|
if (i < len)
|
|
|
|
a0 = tab[i];
|
|
|
|
else
|
|
|
|
a0 = 0;
|
|
|
|
if (p == 0) {
|
|
|
|
return a0;
|
|
|
|
} else {
|
|
|
|
i++;
|
|
|
|
if (i < len)
|
|
|
|
a1 = tab[i];
|
|
|
|
else
|
|
|
|
a1 = 0;
|
|
|
|
return (a0 >> p) | (a1 << (LIMB_BITS - p));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline limb_t get_bit(const limb_t *tab, limb_t len, slimb_t pos)
|
|
|
|
{
|
|
|
|
slimb_t i;
|
|
|
|
i = pos >> LIMB_LOG2_BITS;
|
|
|
|
if (i < 0 || i >= len)
|
|
|
|
return 0;
|
|
|
|
return (tab[i] >> (pos & (LIMB_BITS - 1))) & 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline limb_t limb_mask(int start, int last)
|
|
|
|
{
|
|
|
|
limb_t v;
|
|
|
|
int n;
|
|
|
|
n = last - start + 1;
|
|
|
|
if (n == LIMB_BITS)
|
|
|
|
v = -1;
|
|
|
|
else
|
|
|
|
v = (((limb_t)1 << n) - 1) << start;
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
static limb_t mp_scan_nz(const limb_t *tab, mp_size_t n)
|
|
|
|
{
|
|
|
|
mp_size_t i;
|
|
|
|
for(i = 0; i < n; i++) {
|
|
|
|
if (tab[i] != 0)
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return != 0 if one bit between 0 and bit_pos inclusive is not zero. */
|
|
|
|
static inline limb_t scan_bit_nz(const bf_t *r, slimb_t bit_pos)
|
|
|
|
{
|
|
|
|
slimb_t pos;
|
|
|
|
limb_t v;
|
|
|
|
|
|
|
|
pos = bit_pos >> LIMB_LOG2_BITS;
|
|
|
|
if (pos < 0)
|
|
|
|
return 0;
|
|
|
|
v = r->tab[pos] & limb_mask(0, bit_pos & (LIMB_BITS - 1));
|
|
|
|
if (v != 0)
|
|
|
|
return 1;
|
|
|
|
pos--;
|
|
|
|
while (pos >= 0) {
|
|
|
|
if (r->tab[pos] != 0)
|
|
|
|
return 1;
|
|
|
|
pos--;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return the addend for rounding. Note that prec can be <= 0 (for
|
|
|
|
BF_FLAG_RADPNT_PREC) */
|
|
|
|
static int bf_get_rnd_add(int *pret, const bf_t *r, limb_t l,
|
|
|
|
slimb_t prec, int rnd_mode)
|
|
|
|
{
|
|
|
|
int add_one, inexact;
|
|
|
|
limb_t bit1, bit0;
|
|
|
|
|
|
|
|
if (rnd_mode == BF_RNDF) {
|
|
|
|
bit0 = 1; /* faithful rounding does not honor the INEXACT flag */
|
|
|
|
} else {
|
|
|
|
/* starting limb for bit 'prec + 1' */
|
|
|
|
bit0 = scan_bit_nz(r, l * LIMB_BITS - 1 - bf_max(0, prec + 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the bit at 'prec' */
|
|
|
|
bit1 = get_bit(r->tab, l, l * LIMB_BITS - 1 - prec);
|
|
|
|
inexact = (bit1 | bit0) != 0;
|
|
|
|
|
|
|
|
add_one = 0;
|
|
|
|
switch(rnd_mode) {
|
|
|
|
case BF_RNDZ:
|
|
|
|
break;
|
|
|
|
case BF_RNDN:
|
|
|
|
if (bit1) {
|
|
|
|
if (bit0) {
|
|
|
|
add_one = 1;
|
|
|
|
} else {
|
|
|
|
/* round to even */
|
|
|
|
add_one =
|
|
|
|
get_bit(r->tab, l, l * LIMB_BITS - 1 - (prec - 1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case BF_RNDD:
|
|
|
|
case BF_RNDU:
|
|
|
|
if (r->sign == (rnd_mode == BF_RNDD))
|
|
|
|
add_one = inexact;
|
|
|
|
break;
|
2020-09-06 16:57:11 +00:00
|
|
|
case BF_RNDA:
|
|
|
|
add_one = inexact;
|
|
|
|
break;
|
2020-09-06 16:53:08 +00:00
|
|
|
case BF_RNDNA:
|
|
|
|
case BF_RNDF:
|
|
|
|
add_one = bit1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (inexact)
|
|
|
|
*pret |= BF_ST_INEXACT;
|
|
|
|
return add_one;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bf_set_overflow(bf_t *r, int sign, limb_t prec, bf_flags_t flags)
|
|
|
|
{
|
|
|
|
slimb_t i, l, e_max;
|
|
|
|
int rnd_mode;
|
|
|
|
|
|
|
|
rnd_mode = flags & BF_RND_MASK;
|
|
|
|
if (prec == BF_PREC_INF ||
|
|
|
|
rnd_mode == BF_RNDN ||
|
|
|
|
rnd_mode == BF_RNDNA ||
|
2020-09-06 16:57:11 +00:00
|
|
|
rnd_mode == BF_RNDA ||
|
2020-09-06 16:53:08 +00:00
|
|
|
(rnd_mode == BF_RNDD && sign == 1) ||
|
|
|
|
(rnd_mode == BF_RNDU && sign == 0)) {
|
|
|
|
bf_set_inf(r, sign);
|
|
|
|
} else {
|
|
|
|
/* set to maximum finite number */
|
|
|
|
l = (prec + LIMB_BITS - 1) / LIMB_BITS;
|
|
|
|
if (bf_resize(r, l)) {
|
|
|
|
bf_set_nan(r);
|
|
|
|
return BF_ST_MEM_ERROR;
|
|
|
|
}
|
|
|
|
r->tab[0] = limb_mask((-prec) & (LIMB_BITS - 1),
|
|
|
|
LIMB_BITS - 1);
|
|
|
|
for(i = 1; i < l; i++)
|
|
|
|
r->tab[i] = (limb_t)-1;
|
|
|
|
e_max = (limb_t)1 << (bf_get_exp_bits(flags) - 1);
|
|
|
|
r->expn = e_max;
|
|
|
|
r->sign = sign;
|
|
|
|
}
|
|
|
|
return BF_ST_OVERFLOW | BF_ST_INEXACT;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* round to prec1 bits assuming 'r' is non zero and finite. 'r' is
|
|
|
|
assumed to have length 'l' (1 <= l <= r->len). Note: 'prec1' can be
|
2020-09-06 16:57:11 +00:00
|
|
|
infinite (BF_PREC_INF). 'ret' is 0 or BF_ST_INEXACT if the result
|
|
|
|
is known to be inexact. Can fail with BF_ST_MEM_ERROR in case of
|
2020-09-06 16:53:08 +00:00
|
|
|
overflow not returning infinity. */
|
2020-09-06 16:57:11 +00:00
|
|
|
static int __bf_round(bf_t *r, limb_t prec1, bf_flags_t flags, limb_t l,
|
|
|
|
int ret)
|
2020-09-06 16:53:08 +00:00
|
|
|
{
|
|
|
|
limb_t v, a;
|
2020-09-06 16:57:11 +00:00
|
|
|
int shift, add_one, rnd_mode;
|
2020-09-06 16:53:08 +00:00
|
|
|
slimb_t i, bit_pos, pos, e_min, e_max, e_range, prec;
|
|
|
|
|
|
|
|
/* e_min and e_max are computed to match the IEEE 754 conventions */
|
|
|
|
e_range = (limb_t)1 << (bf_get_exp_bits(flags) - 1);
|
|
|
|
e_min = -e_range + 3;
|
|
|
|
e_max = e_range;
|
|
|
|
|
|
|
|
if (flags & BF_FLAG_RADPNT_PREC) {
|
|
|
|
/* 'prec' is the precision after the radix point */
|
|
|
|
if (prec1 != BF_PREC_INF)
|
|
|
|
prec = r->expn + prec1;
|
|
|
|
else
|
|
|
|
prec = prec1;
|
|
|
|
} else if (unlikely(r->expn < e_min) && (flags & BF_FLAG_SUBNORMAL)) {
|
|
|
|
/* restrict the precision in case of potentially subnormal
|
|
|
|
result */
|
|
|
|
assert(prec1 != BF_PREC_INF);
|
|
|
|
prec = prec1 - (e_min - r->expn);
|
|
|
|
} else {
|
|
|
|
prec = prec1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* round to prec bits */
|
|
|
|
rnd_mode = flags & BF_RND_MASK;
|
|
|
|
add_one = bf_get_rnd_add(&ret, r, l, prec, rnd_mode);
|
|
|
|
|
|
|
|
if (prec <= 0) {
|
|
|
|
if (add_one) {
|
|
|
|
bf_resize(r, 1); /* cannot fail */
|
|
|
|
r->tab[0] = (limb_t)1 << (LIMB_BITS - 1);
|
|
|
|
r->expn += 1 - prec;
|
|
|
|
ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT;
|
|
|
|
return ret;
|
|
|
|
} else {
|
|
|
|
goto underflow;
|
|
|
|
}
|
|
|
|
} else if (add_one) {
|
|
|
|
limb_t carry;
|
|
|
|
|
|
|
|
/* add one starting at digit 'prec - 1' */
|
|
|
|
bit_pos = l * LIMB_BITS - 1 - (prec - 1);
|
|
|
|
pos = bit_pos >> LIMB_LOG2_BITS;
|
|
|
|
carry = (limb_t)1 << (bit_pos & (LIMB_BITS - 1));
|
|
|
|
|
|
|
|
for(i = pos; i < l; i++) {
|
|
|
|
v = r->tab[i] + carry;
|
|
|
|
carry = (v < carry);
|
|
|
|
r->tab[i] = v;
|
|
|
|
if (carry == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (carry) {
|
|
|
|
/* shift right by one digit */
|
|
|
|
v = 1;
|
|
|
|
for(i = l - 1; i >= pos; i--) {
|
|
|
|
a = r->tab[i];
|
|
|
|
r->tab[i] = (a >> 1) | (v << (LIMB_BITS - 1));
|
|
|
|
v = a;
|
|
|
|
}
|
|
|
|
r->expn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check underflow */
|
|
|
|
if (unlikely(r->expn < e_min)) {
|
|
|
|
if (flags & BF_FLAG_SUBNORMAL) {
|
|
|
|
/* if inexact, also set the underflow flag */
|
|
|
|
if (ret & BF_ST_INEXACT)
|
|
|
|
ret |= BF_ST_UNDERFLOW;
|
|
|
|
} else {
|
|
|
|
underflow:
|
|
|
|
ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT;
|
|
|
|
bf_set_zero(r, r->sign);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check overflow */
|
|
|
|
if (unlikely(r->expn > e_max))
|
|
|
|
return bf_set_overflow(r, r->sign, prec1, flags);
|
|
|
|
|
|
|
|
/* keep the bits starting at 'prec - 1' */
|
|
|
|
bit_pos = l * LIMB_BITS - 1 - (prec - 1);
|
|
|
|
i = bit_pos >> LIMB_LOG2_BITS;
|
|
|
|
if (i >= 0) {
|
|
|
|
shift = bit_pos & (LIMB_BITS - 1);
|
|
|
|
if (shift != 0)
|
|
|
|
r->tab[i] &= limb_mask(shift, LIMB_BITS - 1);
|
|
|
|
} else {
|
|
|
|
i = 0;
|
|
|
|
}
|
|
|
|
/* remove trailing zeros */
|
|
|
|
while (r->tab[i] == 0)
|
|
|
|
i++;
|
|
|
|
if (i > 0) {
|
|
|
|
l -= i;
|
|
|
|
memmove(r->tab, r->tab + i, l * sizeof(limb_t));
|
|
|
|
}
|
|
|
|
bf_resize(r, l); /* cannot fail */
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 'r' must be a finite number. */
|
|
|
|
int bf_normalize_and_round(bf_t *r, limb_t prec1, bf_flags_t flags)
|
|
|
|
{
|
|
|
|
limb_t l, v, a;
|
|
|
|
int shift, ret;
|
|
|
|
slimb_t i;
|
|
|
|
|
|
|
|
// bf_print_str("bf_renorm", r);
|
|
|
|
l = r->len;
|
|
|
|
while (l > 0 && r->tab[l - 1] == 0)
|
|
|
|
l--;
|
|
|
|
if (l == 0) {
|
|
|
|
/* zero */
|
|
|
|
r->expn = BF_EXP_ZERO;
|
|
|
|
bf_resize(r, 0); /* cannot fail */
|
|
|
|
ret = 0;
|
|
|
|
} else {
|
|
|
|
r->expn -= (r->len - l) * LIMB_BITS;
|
|
|
|
/* shift to have the MSB set to '1' */
|
|
|
|
v = r->tab[l - 1];
|
|
|
|
shift = clz(v);
|
|
|
|
if (shift != 0) {
|
|
|
|
v = 0;
|
|
|
|
for(i = 0; i < l; i++) {
|
|
|
|
a = r->tab[i];
|
|
|
|
r->tab[i] = (a << shift) | (v >> (LIMB_BITS - shift));
|
|
|
|
v = a;
|
|
|
|
}
|
|
|
|
r->expn -= shift;
|
|
|
|
}
|
2020-09-06 16:57:11 +00:00
|
|
|
ret = __bf_round(r, prec1, flags, l, 0);
|
2020-09-06 16:53:08 +00:00
|
|
|
}
|
|
|
|
// bf_print_str("r_final", r);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return true if rounding can be done at precision 'prec' assuming
|
|
|
|
the exact result r is such that |r-a| <= 2^(EXP(a)-k). */
|
|
|
|
/* XXX: check the case where the exponent would be incremented by the
|
|
|
|
rounding */
|
|
|
|
int bf_can_round(const bf_t *a, slimb_t prec, bf_rnd_t rnd_mode, slimb_t k)
|
|
|
|
{
|
|
|
|
BOOL is_rndn;
|
|
|
|
slimb_t bit_pos, n;
|
|
|
|
limb_t bit;
|
|
|
|
|
|
|
|
if (a->expn == BF_EXP_INF || a->expn == BF_EXP_NAN)
|
|
|
|
return FALSE;
|
|
|
|
if (rnd_mode == BF_RNDF) {
|
|
|
|
return (k >= (prec + 1));
|
|
|
|
}
|
|
|
|
if (a->expn == BF_EXP_ZERO)
|
|
|
|
return FALSE;
|
2020-09-06 16:57:11 +00:00
|
|
|
is_rndn = (rnd_mode == BF_RNDN || rnd_mode == BF_RNDNA);
|
2020-09-06 16:53:08 +00:00
|
|
|
if (k < (prec + 2))
|
|
|
|
return FALSE;
|
|
|
|
bit_pos = a->len * LIMB_BITS - 1 - prec;
|
|
|
|
n = k - prec;
|
|
|
|
/* bit pattern for RNDN or RNDNA: 0111.. or 1000...
|
|
|
|
for other rounding modes: 000... or 111...
|
|
|
|
*/
|
|
|
|
bit = get_bit(a->tab, a->len, bit_pos);
|
|
|
|
bit_pos--;
|
|
|
|
n--;
|
|
|
|
bit ^= is_rndn;
|
|
|
|
/* XXX: slow, but a few iterations on average */
|
|
|
|
while (n != 0) {
|
|
|
|
if (get_bit(a->tab, a->len, bit_pos) != bit)
|
|
|
|
return TRUE;
|
|
|
|
bit_pos--;
|
|
|
|
n--;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Cannot fail with BF_ST_MEM_ERROR. */
|
|
|
|
int bf_round(bf_t *r, limb_t prec, bf_flags_t flags)
|
|
|
|
{
|
|
|
|
if (r->len == 0)
|
|
|
|
return 0;
|
2020-09-06 16:57:11 +00:00
|
|
|
return __bf_round(r, prec, flags, r->len, 0);
|
2020-09-06 16:53:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* for debugging */
|
|
|
|
static __maybe_unused void dump_limbs(const char *str, const limb_t *tab, limb_t n)
|
|
|
|
{
|
|
|
|
limb_t i;
|
|
|
|
printf("%s: len=%" PRId_LIMB "\n", str, n);
|
|
|
|
for(i = 0; i < n; i++) {
|
|
|
|
printf("%" PRId_LIMB ": " FMT_LIMB "\n",
|
|
|
|
i, tab[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void mp_print_str(const char *str, const limb_t *tab, limb_t n)
|
|
|
|
{
|
|
|
|
slimb_t i;
|
|
|
|
printf("%s= 0x", str);
|
|
|
|
for(i = n - 1; i >= 0; i--) {
|
|
|
|
if (i != (n - 1))
|
|
|
|
printf("_");
|
|
|
|
printf(FMT_LIMB, tab[i]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static __maybe_unused void mp_print_str_h(const char *str,
|
|
|
|
const limb_t *tab, limb_t n,
|
|
|
|
limb_t high)
|
|
|
|
{
|
|
|
|
slimb_t i;
|
|
|
|
printf("%s= 0x", str);
|
|
|
|
printf(FMT_LIMB, high);
|
|
|
|
for(i = n - 1; i >= 0; i--) {
|
|
|
|
printf("_");
|
|
|
|
printf(FMT_LIMB, tab[i]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* for debugging */
|
|
|
|
void bf_print_str(const char *str, const bf_t *a)
|
|
|
|
{
|
|
|
|
slimb_t i;
|
|
|
|
printf("%s=", str);
|
|
|
|
|
|
|
|
if (a->expn == BF_EXP_NAN) {
|
|
|
|
printf("NaN");
|
|
|
|
} else {
|
|
|
|
if (a->sign)
|
|
|
|
putchar('-');
|
|
|
|
if (a->expn == BF_EXP_ZERO) {
|
|
|
|
putchar('0');
|
|
|
|
} else if (a->expn == BF_EXP_INF) {
|
|
|
|
printf("Inf");
|
|
|
|
} else {
|
|
|
|
printf("0x0.");
|
|
|
|
for(i = a->len - 1; i >= 0; i--)
|
|
|
|
printf(FMT_LIMB, a->tab[i]);
|
|
|
|
printf("p%" PRId_LIMB, a->expn);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* compare the absolute value of 'a' and 'b'. Return < 0 if a < b, 0
|
|
|
|
if a = b and > 0 otherwise. */
|
|
|
|
int bf_cmpu(const bf_t *a, const bf_t *b)
|
|
|
|
{
|
|
|
|
slimb_t i;
|
|
|
|
limb_t len, v1, v2;
|
|
|
|
|
|
|
|
if (a->expn != b->expn) {
|
|
|
|
if (a->expn < b->expn)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
len = bf_max(a->len, b->len);
|
|
|
|
for(i = len - 1; i >= 0; i--) {
|
|
|
|
v1 = get_limbz(a, a->len - len + i);
|
|
|
|
v2 = get_limbz(b, b->len - len + i);
|
|
|
|
if (v1 != v2) {
|
|
|
|
if (v1 < v2)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Full order: -0 < 0, NaN == NaN and NaN is larger than all other numbers */
|
|
|
|
int bf_cmp_full(const bf_t *a, const bf_t *b)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
|
|
|
|
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
|
|
|
|
if (a->expn == b->expn)
|
|
|
|
res = 0;
|
|
|
|
else if (a->expn == BF_EXP_NAN)
|
|
|
|
res = 1;
|
|
|
|
else
|
|
|
|
res = -1;
|
|
|
|
} else if (a->sign != b->sign) {
|
|
|
|
res = 1 - 2 * a->sign;
|
|
|
|
} else {
|
|
|
|
res = bf_cmpu(a, b);
|
|
|
|
if (a->sign)
|
|
|
|
res = -res;
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2020-09-06 16:57:11 +00:00
|
|
|
/* Standard floating point comparison: return 2 if one of the operands
|
|
|
|
is NaN (unordered) or -1, 0, 1 depending on the ordering assuming
|
|
|
|
-0 == +0 */
|
|
|
|
int bf_cmp(const bf_t *a, const bf_t *b)
|
2020-09-06 16:53:08 +00:00
|
|
|
{
|
|
|
|
int res;
|
|
|
|
|
2020-09-06 16:57:11 +00:00
|
|
|
if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) {
|
|
|
|
res = 2;
|
|
|
|
} else if (a->sign != b->sign) {
|
|
|
|
if (a->expn == BF_EXP_ZERO && b->expn == BF_EXP_ZERO)
|
|
|
|
res = 0;
|
|
|
|
else
|
|
|
|
res = 1 - 2 * a->sign;
|
2020-09-06 16:53:08 +00:00
|
|
|
} else {
|
|
|
|
res = bf_cmpu(a, b);
|
2020-09-06 16:57:11 +00:00
|
|
|
if (a->sign)
|
|
|
|
res = -res;
|
2020-09-06 16:53:08 +00:00
|
|
|
}
|
2020-09-06 16:57:11 +00:00
|
|
|
return res;
|
2020-09-06 16:53:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Compute the number of bits 'n' matching the pattern:
|
|
|
|
a= X1000..0
|
|
|
|
b= X0111..1
|
|
|
|
|
|
|
|
When computing a-b, the result will have at least n leading zero
|
|
|
|
bits.
|
|
|
|
|
|
|
|
Precondition: a > b and a.expn - b.expn = 0 or 1
|
|
|
|
*/
|
|
|
|
static limb_t count_cancelled_bits(const bf_t *a, const bf_t *b)
|
|
|
|
{
|
|
|
|
slimb_t bit_offset, b_offset, n;
|
|
|
|
int p, p1;
|
|
|
|
limb_t v1, v2, mask;
|
|
|
|
|
|
|
|
bit_offset = a->len * LIMB_BITS - 1;
|
|
|
|
b_offset = (b->len - a->len) * LIMB_BITS - (LIMB_BITS - 1) +
|
|
|
|
a->expn - b->expn;
|
|
|
|
n = 0;
|
|
|
|
|
|
|
|
/* first search the equals bits */
|
|
|
|
for(;;) {
|
|
|
|
v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS);
|
|
|
|
v2 = get_bits(b->tab, b->len, bit_offset + b_offset);
|
|
|
|
// printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2);
|
|
|
|
if (v1 != v2)
|
|
|
|
break;
|
|
|
|
n += LIMB_BITS;
|
|
|
|
bit_offset -= LIMB_BITS;
|
|
|
|
}
|
|
|
|
/* find the position of the first different bit */
|
|
|
|
p = clz(v1 ^ v2) + 1;
|
|
|
|
n += p;
|
|
|
|
/* then search for '0' in a and '1' in b */
|
|
|
|
p = LIMB_BITS - p;
|
|
|
|
if (p > 0) {
|
|
|
|
/* search in the trailing p bits of v1 and v2 */
|
|
|
|
mask = limb_mask(0, p - 1);
|
|
|
|
p1 = bf_min(clz(v1 & mask), clz((~v2) & mask)) - (LIMB_BITS - p);
|
|
|
|
n += p1;
|
|
|
|
if (p1 != p)
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
bit_offset -= LIMB_BITS;
|
|
|
|
for(;;) {
|
|
|
|
v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS);
|
|
|
|
v2 = get_bits(b->tab, b->len, bit_offset + b_offset);
|
|
|
|
// printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2);
|
|
|
|
if (v1 != 0 || v2 != -1) {
|
|
|
|
/* different: count the matching bits */
|
|
|
|
p1 = bf_min(clz(v1), clz(~v2));
|
|
|
|
n += p1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
n += LIMB_BITS;
|
|
|
|
bit_offset -= LIMB_BITS;
|
|
|
|
}
|
|
|
|
done:
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bf_add_internal(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
|
|
|
|
bf_flags_t flags, int b_neg)
|
|
|
|
{
|
|
|
|
const bf_t *tmp;
|
|
|
|
int is_sub, ret, cmp_res, a_sign, b_sign;
|
|
|
|
|
|
|
|
a_sign = a->sign;
|
|
|
|
b_sign = b->sign ^ b_neg;
|
|
|
|
is_sub = a_sign ^ b_sign;
|
|
|
|
cmp_res = bf_cmpu(a, b);
|
|
|
|
if (cmp_res < 0) {
|
|
|
|
tmp = a;
|
|
|
|
a = b;
|
|
|
|
b = tmp;
|
|
|
|
a_sign = b_sign; /* b_sign is never used later */
|
|
|
|
}
|
|
|
|
/* abs(a) >= abs(b) */
|
|
|
|
if (cmp_res == 0 && is_sub && a->expn < BF_EXP_INF) {
|
|
|
|
/* zero result */
|
|
|
|
bf_set_zero(r, (flags & BF_RND_MASK) == BF_RNDD);
|
|
|
|
ret = 0;
|
|
|
|
} else if (a->len == 0 || b->len == 0) {
|
|
|
|
ret = 0;
|
|
|
|
if (a->expn >= BF_EXP_INF) {
|
|
|
|
if (a->expn == BF_EXP_NAN) {
|
|
|
|
/* at least one operand is NaN */
|
|
|
|
bf_set_nan(r);
|
|
|
|
} else if (b->expn == BF_EXP_INF && is_sub) {
|
|
|
|
/* infinities with different signs */
|
|
|
|
bf_set_nan(r);
|
|
|
|
ret = BF_ST_INVALID_OP;
|
|
|
|
} else {
|
|
|
|
bf_set_inf(r, a_sign);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* at least one zero and not subtract */
|
|
|
|
bf_set(r, a);
|
|
|
|
r->sign = a_sign;
|
|
|
|
goto renorm;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
slimb_t d, a_offset, b_bit_offset, i, cancelled_bits;
|
|
|
|
limb_t carry, v1, v2, u, r_len, carry1, precl, tot_len, z, sub_mask;
|
|
|
|
|
|
|
|
r->sign = a_sign;
|
|
|
|
r->expn = a->expn;
|
|
|
|
d = a->expn - b->expn;
|
|
|
|
/* must add more precision for the leading cancelled bits in
|
|
|
|
subtraction */
|
|
|
|
if (is_sub) {
|
|
|
|
if (d <= 1)
|
|
|
|
cancelled_bits = count_cancelled_bits(a, b);
|
|
|
|
else
|
|
|
|
cancelled_bits = 1;
|
|
|
|
} else {
|
|
|
|
cancelled_bits = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* add two extra bits for rounding */
|
|
|
|
precl = (cancelled_bits + prec + 2 + LIMB_BITS - 1) / LIMB_BITS;
|
|
|
|
tot_len = bf_max(a->len, b->len + (d + LIMB_BITS - 1) / LIMB_BITS);
|
|
|
|
r_len = bf_min(precl, tot_len);
|
|
|
|
if (bf_resize(r, r_len))
|
|
|
|
goto fail;
|
|
|
|
a_offset = a->len - r_len;
|
|
|
|
b_bit_offset = (b->len - r_len) * LIMB_BITS + d;
|
|
|
|
|
|
|
|
/* compute the bits before for the rounding */
|
|
|
|
carry = is_sub;
|
|
|
|
z = 0;
|
|
|
|
sub_mask = -is_sub;
|
|
|
|
i = r_len - tot_len;
|
|
|
|
while (i < 0) {
|
|
|
|
slimb_t ap, bp;
|
|
|
|
BOOL inflag;
|
|
|
|
|
|
|
|
ap = a_offset + i;
|
|
|
|
bp = b_bit_offset + i * LIMB_BITS;
|
|
|
|
inflag = FALSE;
|
|
|
|
if (ap >= 0 && ap < a->len) {
|
|
|
|
v1 = a->tab[ap];
|
|
|
|
inflag = TRUE;
|
|
|
|
} else {
|
|
|
|
v1 = 0;
|
|
|
|
}
|
|
|
|
if (bp + LIMB_BITS > 0 && bp < (slimb_t)(b->len * LIMB_BITS)) {
|
|
|
|
v2 = get_bits(b->tab, b->len, bp);
|
|
|
|
inflag = TRUE;
|
|
|
|
} else {
|
|
|
|
v2 = 0;
|
|
|
|
}
|
|
|
|
if (!inflag) {
|
|
|
|
/* outside 'a' and 'b': go directly to the next value
|
|
|
|
inside a or b so that the running time does not
|
|
|
|
depend on the exponent difference */
|
|
|
|
i = 0;
|
|
|
|
if (ap < 0)
|
|
|
|
i = bf_min(i, -a_offset);
|
|
|
|
/* b_bit_offset + i * LIMB_BITS + LIMB_BITS >= 1
|
|
|
|
equivalent to
|
|
|
|
i >= ceil(-b_bit_offset + 1 - LIMB_BITS) / LIMB_BITS)
|
|
|
|
*/
|
|
|
|
if (bp + LIMB_BITS <= 0)
|
|
|
|
i = bf_min(i, (-b_bit_offset) >> LIMB_LOG2_BITS);
|
|
|
|
} else {
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
v2 ^= sub_mask;
|
|
|
|
u = v1 + v2;
|
|
|
|
carry1 = u < v1;
|
|
|
|
u += carry;
|
|
|
|
carry = (u < carry) | carry1;
|
|
|
|
z |= u;
|
|
|
|
}
|
|
|
|
/* and the result */
|
|
|
|
for(i = 0; i < r_len; i++) {
|
|
|
|
v1 = get_limbz(a, a_offset + i);
|
|
|
|
v2 = get_bits(b->tab, b->len, b_bit_offset + i * LIMB_BITS);
|
|
|
|
v2 ^= sub_mask;
|
|
|
|
u = v1 + v2;
|
|
|
|
carry1 = u < v1;
|
|
|
|
u += carry;
|
|
|
|
carry = (u < carry) | carry1;
|
|
|
|
r->tab[i] = u;
|
|
|
|
}
|
|
|
|
/* set the extra bits for the rounding */
|
|
|
|
r->tab[0] |= (z != 0);
|
|
|
|
|
|
|
|
/* carry is only possible in add case */
|
|
|
|
if (!is_sub && carry) {
|
|
|
|
if (bf_resize(r, r_len + 1))
|
|
|
|
goto fail;
|
|
|
|
r->tab[r_len] = 1;
|
|
|
|
r->expn += LIMB_BITS;
|
|
|
|
}
|
|
|
|
renorm:
|
|
|
|
ret = bf_normalize_and_round(r, prec, flags);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
fail:
|
|
|
|
bf_set_nan(r);
|
|
|
|
return BF_ST_MEM_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
|
|
|
|
bf_flags_t flags)
|
|
|
|
{
|
|
|
|
return bf_add_internal(r, a, b, prec, flags, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
|
|
|
|
bf_flags_t flags)
|
|
|
|
{
|
|
|
|
return bf_add_internal(r, a, b, prec, flags, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
limb_t mp_add(limb_t *res, const limb_t *op1, const limb_t *op2,
|
|
|
|
limb_t n, limb_t carry)
|
|
|
|
{
|
|
|
|
slimb_t i;
|
|
|
|
limb_t k, a, v, k1;
|
|
|
|
|
|
|
|
k = carry;
|
|
|
|
for(i=0;i<n;i++) {
|
|
|
|
v = op1[i];
|
|
|
|
a = v + op2[i];
|
|
|
|
k1 = a < v;
|
|
|
|
a = a + k;
|
|
|
|
k = (a < k) | k1;
|
|
|
|
res[i] = a;
|
|
|
|
}
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
limb_t mp_add_ui(limb_t *tab, limb_t b, size_t n)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
limb_t k, a;
|
|
|
|
|
|
|
|
k=b;
|
|
|
|
for(i=0;i<n;i++) {
|
|
|
|
if (k == 0)
|
|
|
|
break;
|
|
|
|
a = tab[i] + k;
|
|
|
|
k = (a < k);
|
|
|
|
tab[i] = a;
|
|
|
|
}
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
limb_t mp_sub(limb_t *res, const limb_t *op1, const limb_t *op2,
|
|
|
|
mp_size_t n, limb_t carry)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
limb_t k, a, v, k1;
|
|
|
|
|
|
|
|
k = carry;
|
|
|
|
for(i=0;i<n;i++) {
|
|
|
|
v = op1[i];
|
|
|
|
a = v - op2[i];
|
|
|
|
k1 = a > v;
|
|
|
|
v = a - k;
|
|
|
|
k = (v > a) | k1;
|
|
|
|
res[i] = v;
|
|
|
|
}
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* compute 0 - op2 */
|
|
|
|
static limb_t mp_neg(limb_t *res, const limb_t *op2, mp_size_t n, limb_t carry)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
limb_t k, a, v, k1;
|
|
|
|
|
|
|
|
k = carry;
|
|
|
|
for(i=0;i<n;i++) {
|
|
|
|
v = 0;
|
|
|
|
a = v - op2[i];
|
|
|
|
k1 = a > v;
|
|
|
|
v = a - k;
|
|
|
|
k = (v > a) | k1;
|
|
|
|
res[i] = v;
|
|
|
|
}
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
limb_t mp_sub_ui(limb_t *tab, limb_t b, mp_size_t n)
|
|
|
|
{
|
|
|
|
mp_size_t i;
|
|
|
|
limb_t k, a, v;
|
|
|
|
|
|
|
|
k=b;
|
|
|
|
for(i=0;i<n;i++) {
|
|
|
|
v = tab[i];
|
|
|
|
a = v - k;
|
|
|
|
k = a > v;
|
|
|
|
tab[i] = a;
|
|
|
|
if (k == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* r = (a + high*B^n) >> shift. Return the remainder r (0 <= r < 2^shift).
|
|
|
|
1 <= shift <= LIMB_BITS - 1 */
|
|
|
|
static limb_t mp_shr(limb_t *tab_r, const limb_t *tab, mp_size_t n,
|
|
|
|
int shift, limb_t high)
|
|
|
|
{
|
|
|
|
mp_size_t i;
|
|
|
|
limb_t l, a;
|
|
|
|
|
|
|
|
assert(shift >= 1 && shift < LIMB_BITS);
|
|
|
|
l = high;
|
|
|
|
for(i = n - 1; i >= 0; i--) {
|
|
|
|
a = tab[i];
|
|
|
|
tab_r[i] = (a >> shift) | (l << (LIMB_BITS - shift));
|
|
|
|
l = a;
|
|
|
|
}
|
|
|
|
return l & (((limb_t)1 << shift) - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* tabr[] = taba[] * b + l. Return the high carry */
|
|
|
|
static limb_t mp_mul1(limb_t *tabr, const limb_t *taba, limb_t n,
|
|
|
|
limb_t b, limb_t l)
|
|
|
|
{
|
|
|
|
limb_t i;
|
|
|
|
dlimb_t t;
|
|
|
|
|
|
|
|
for(i = 0; i < n; i++) {
|
|
|
|
t = (dlimb_t)taba[i] * (dlimb_t)b + l;
|
|
|
|
|