`apyfloat_util.h`¶

Defines

GET_QNTZ_FUNC_ALTERNATIVE(Q, SUPPORT)¶

Typedefs

template<std::size_t SRC1_INC = 1, std::size_t SRC2_INC = 1, std::size_t DST_INC = 1> using FloatingPointAdder = _FloatingPointAddSub<false, SRC1_INC, SRC2_INC, DST_INC>¶

template<std::size_t SRC1_INC = 1, std::size_t SRC2_INC = 1, std::size_t DST_INC = 1> using FloatingPointSubtractor = _FloatingPointAddSub<true, SRC1_INC, SRC2_INC, DST_INC>¶

Functions

static inline bool is_zero(const APyFloatData &src)¶

static inline bool is_max_exponent(const APyFloatData &src, std::uint8_t exp_bits)¶

static inline bool is_max_exponent(const APyFloatData &src, const APyFloatSpec &spec)¶

static inline bool is_normal(const APyFloatData &src, std::uint8_t exp_bits)¶

static inline bool is_nan(const APyFloatData &src, std::uint8_t exp_bits)¶

static inline bool is_nan(const APyFloatData &src, const APyFloatSpec &spec)¶

static inline bool is_inf(const APyFloatData &src, std::uint8_t exp_bits)¶

static inline bool is_inf(const APyFloatData &src, const APyFloatSpec &spec)¶

static inline bool is_finite(const APyFloatData &src, const APyFloatSpec &spec)¶

static inline std::int64_t true_exp(const APyFloatData &src, exp_t bias)¶

static inline std::int64_t true_exp(const APyFloatData &src, const APyFloatSpec &spec)¶

static inline man_t true_man(const APyFloatData &src, std::uint8_t exp_bits, std::uint8_t man_bits)¶

static inline man_t true_man(const APyFloatData &src, const APyFloatSpec &spec)¶

static inline std::tuple<std::int64_t, std::size_t> pure_exp(const APyFloatData &src, const APyFloatSpec &spec)¶: The pure exponent (pure_exp) is the bit index of the first non-zero bit in a floating-point number, if it exists. It can be thought of as the exponent, in base 2, for the number written using scientific notation. The pure exponent is equal to the true biased exponent for all normal numbers.

static inline std::tuple<APyFloatData, std::uint8_t, exp_t> normalize(const APyFloatData &src, std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias)¶: Return a normalized (non subnormal) floating-point copy of src. Returns a three-tuple, [ APyFloatData, exp_bits, bias ]

static inline std::tuple<APyFloatData, std::uint8_t, exp_t> normalize(const APyFloatData &src, const APyFloatSpec &spec)¶

static inline exp_t ieee_bias(std::uint8_t exp_bits)¶: Retrieve a generalized IEEE-754 bias for exp_bits

static inline exp_t calc_bias(int new_exp_bits, int exp_bits1, exp_t bias1, int exp_bits2, exp_t bias2)¶: Calculate new bias. Assumes new_exp_bits is larger than exp_bits1 and exp_bits2.

static inline exp_t calc_bias(int new_exp_bits, const APyFloatSpec &spec1, const APyFloatSpec &spec2)¶: Calculate new bias. Assumes new_exp_bits is larger than exp_bits1 and exp_bits2.

static inline bool do_infinity(QuantizationMode mode, bool sign)¶: Test if a quantization mode shall saturate to infinity or greatest normal number.

template<QuantizationMode QNTZ, bool SUPPORT_NEGATIVE_BITS_TO_QUANTIZE = false> static inline auto _qntz_func(man_t &man, exp_t &exp, exp_t max_exp, int bits_to_quantize, bool sign, man_t man_msb_constant, man_t sticky_constant)¶: Specialized floating-point mantissa quantization functions.

static inline auto get_qntz_func(QuantizationMode qntz, bool support_negative_bits_to_quantize = false)¶: Retrieve a specialized quantization function.

static inline man_t quantize_close_to_zero(bool sign, QuantizationMode quantization)¶: Quantize mantissa when the result is guaranteed to be either be 0 or 1. Quantization mode STOCH_WEIGHTED should be used with this function.

template<typename QNTZ_FUNC_SIGNATURE> static inline APyFloatData floating_point_cast(const APyFloatData &src, const APyFloatSpec &src_spec, const APyFloatSpec &dst_spec, QuantizationMode qntz, QNTZ_FUNC_SIGNATURE qntz_func)¶: Cast a floating-point value from one format to another.

template<typename QNTZ_FUNC_SIGNATURE> static inline APyFloatData array_floating_point_cast_pos_man_delta(const APyFloatData &src, const APyFloatSpec &src_spec, const APyFloatSpec &dst_spec, QuantizationMode qntz, QNTZ_FUNC_SIGNATURE qntz_func, const exp_t SRC_MAX_EXP, const exp_t DST_MAX_EXP, const man_t SRC_LEADING_ONE, const man_t DST_LEADING_ONE, const int SPEC_MAN_BITS_DELTA, const man_t SRC_HIDDEN_ONE, const std::int64_t BIAS_DELTA)¶: Cast a floating-point value from one format to another using many pre-computed constants making it suitable for arrays. Specialized for the case where more mantissa bits are used in the result.

template<typename QNTZ_FUNC_SIGNATURE> static inline APyFloatData array_floating_point_cast_neg_man_delta(const APyFloatData &src, const APyFloatSpec &src_spec, const APyFloatSpec &dst_spec, QuantizationMode qntz, QNTZ_FUNC_SIGNATURE qntz_func, const exp_t SRC_MAX_EXP, const exp_t DST_MAX_EXP, const man_t SRC_LEADING_ONE, const man_t DST_LEADING_ONE, const int SPEC_MAN_BITS_DELTA_REV, const man_t SRC_HIDDEN_ONE, const man_t FINAL_STICKY, const std::int64_t BIAS_DELTA)¶: Cast a floating-point value from one format to another using many pre-computed constants making it suitable for arrays. Specialized for the case where fewer mantissa bits are used in the result.

static inline APyFloatData floating_point_cast_no_quant(const APyFloatData &src, const APyFloatSpec &src_spec, const APyFloatSpec &dst_spec)¶: Cast a floating-point number when it is known that no quantization happens.

static inline APyFloatData floating_point_cast_no_quant_no_max_exp(const APyFloatData &src, const std::uint8_t src_man_bits, const std::uint8_t dst_man_bits)¶: Cast a floating-point number when it is known that no quantization happens and that the exponent is not max. In addition, src_spec.bias = dst_spec.bias.

static inline APyFloatData array_floating_point_cast_no_quant(const APyFloatData &src, const APyFloatSpec &src_spec, const exp_t SRC_MAX_EXP, const exp_t DST_MAX_EXP, const int SPEC_MAN_BITS_DELTA, const std::int64_t BIAS_DELTA)¶: Cast a floating-point number when it is known that no quantization happens using pre-computed values suitable for arrays

static inline std::uint64_t to_bits_uint64(const APyFloatData &d, std::uint8_t exp_bits, std::uint8_t man_bits)¶: Return the bit pattern of a floating-point data field. No checks on bit width is done.

static inline nb::int_ apyfloat_to_bits(const APyFloatData &d, std::uint8_t exp_bits, std::uint8_t man_bits)¶

static inline man_t ipow(man_t base, unsigned int n)¶: Fast integer power by squaring.

static inline unsigned int leading_zeros_apyfixed(const APyFixed &fx)¶: Get the number of left shifts needed to make fx>=1.0.

static inline QuantizationMode translate_quantization_mode(QuantizationMode quantization, bool sign)¶: Translate the quantization mode for floating-point to the fixed-point equivalent. This is used for the mantissa so the sign must be taken into account.

static inline void quantize_apymantissa(APyFixed &apyman, bool sign, int bits, QuantizationMode quantization)¶: Quantize mantissa stored as an APyFixed

static inline std::uint8_t check_exponent_format(int exp_bits, std::string_view exception_msg_prefix = "apytypes")¶: Check that the number of exponent bits is allowed, throw otherwise.

static inline std::uint8_t check_mantissa_format(int man_bits, std::string_view exception_msg_prefix = "apytypes")¶: Check that the number of mantissa bits is allowed, throw otherwise.

template<typename RANDOM_ACCESS_ITERATOR_IN> APyFloatData floating_point_from_fixed_point(RANDOM_ACCESS_ITERATOR_IN cbegin_it, RANDOM_ACCESS_ITERATOR_IN cend_it, int bits, int int_bits, int exp_bits, int man_bits, exp_t bias, QuantizationMode q_mode = QuantizationMode::RND_CONV)¶

static inline double floating_point_to_double(const APyFloatData &data_in, const APyFloatSpec &spec)¶

template<typename QNTZ_FUNC_SIGNATURE> static inline APyFloatData floating_point_scalbn(const APyFloatData &src, const APyFloatSpec &src_spec, int exp, QuantizationMode qntz, QNTZ_FUNC_SIGNATURE qntz_func)¶: Multiply a floating-point value (data of spec) with 2^(exp) efficiently. Correct rounding, as specified by qntz and qntz_func are guaranteed on floating-point underflow. See cppreference for scalbn: https://en.cppreference.com/w/c/numeric/math/scalbn

static inline bool floating_point_less_than(const APyFloatData &src1, const APyFloatSpec &src1_spec, const APyFloatData &src2, const APyFloatSpec &src2_spec)¶: Iterator-based less-than function, comparing src1 < src2

static inline bool floating_point_less_than_abs(const APyFloatData &src1, const APyFloatSpec &src1_spec, const APyFloatData &src2, const APyFloatSpec &src2_spec)¶: Iterator-based less-than function, comparing abs(src1) < abs(src2)

static inline bool floating_point_less_than_abs_same_wl(const APyFloatData &src1, const APyFloatData &src2)¶: Memory-based absolute value less-than function, comparing abs(src1) < abs(src2) when src1 and src2 have the same bit specifiers. This method assumes that neither src1 nor src2 are NaN or inf.

template<const bool SUB = false, typename RANDOM_ACCESS_ITERATOR_IN1, typename RANDOM_ACCESS_ITERATOR_IN2, typename RANDOM_ACCESS_ITERATOR_INOUT, typename QNTZ_FUNC_SIGNATURE> static inline void _floating_point_add_same_wl(RANDOM_ACCESS_ITERATOR_IN1 src1, RANDOM_ACCESS_ITERATOR_IN2 src2, RANDOM_ACCESS_ITERATOR_INOUT dst, const APyFloatSpec &spec, const QuantizationMode &qntz, QNTZ_FUNC_SIGNATURE qntz_func, const exp_t MAX_EXP, const man_t FINAL_RES_LO, const man_t RES_LO, const man_t CARRY_RES_LO, const man_t MAN_MASK, const unsigned NORMALIZATION_CONST)¶

Floating-point addition of src1 and src2 when they share the exact word length (exp_bits, man_bits, and bias). To use this function, all of the following must be satisfied:

spec.man_bits + 5 <= _MAN_T_SIZE_BITS
qntz != QuantizationMode::STOCH_WEIGHTED

template<const bool SUB = false, typename RANDOM_ACCESS_ITERATOR_IN1, typename RANDOM_ACCESS_ITERATOR_IN2, typename RANDOM_ACCESS_ITERATOR_INOUT, typename QNTZ_FUNC_SIGNATURE> static inline void _floating_point_add_diff_wl(RANDOM_ACCESS_ITERATOR_IN1 src1, RANDOM_ACCESS_ITERATOR_IN2 src2, RANDOM_ACCESS_ITERATOR_INOUT dst, const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz, QNTZ_FUNC_SIGNATURE qntz_func, const exp_t RES_MAX_EXP, const man_t FINAL_RES_LO, const man_t RES_LO, const man_t CARRY_RES_LO, const man_t MAN_MASK, const unsigned NORMALIZATION_CONST)¶

Floating-point addition of src1 and src2 when they don’t share word lengths (any of: exp_bits, man_bits, and bias). To use this function, all of the following must be satisfied:

dst_spec.man_bits + 5 <= _MAN_T_SIZE_BITS
qntz != QuantizationMode::STOCH_WEIGHTED

template<const bool SUB = false, typename RANDOM_ACCESS_ITERATOR_IN1, typename RANDOM_ACCESS_ITERATOR_IN2, typename RANDOM_ACCESS_ITERATOR_INOUT> static inline void _floating_point_add_general(RANDOM_ACCESS_ITERATOR_IN1 src1, RANDOM_ACCESS_ITERATOR_IN2 src2, RANDOM_ACCESS_ITERATOR_INOUT dst, const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz, const exp_t RES_MAX_EXP)¶: Floating-point addition of src1 and src2 for when_floating_point_add_same_wl or _floating_point_sum_diff_wl can’t be used. Works in all cases, but is the slowest.

template<typename RANDOM_ACCESS_ITERATOR_IN1, typename RANDOM_ACCESS_ITERATOR_IN2, typename RANDOM_ACCESS_ITERATOR_INOUT, typename QNTZ_FUNC_SIGNATURE> static inline void _floating_point_mul_short(RANDOM_ACCESS_ITERATOR_IN1 src1, RANDOM_ACCESS_ITERATOR_IN2 src2, RANDOM_ACCESS_ITERATOR_INOUT dst, const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz, QNTZ_FUNC_SIGNATURE qntz_func, const unsigned SUM_MAN_BITS, const exp_t SRC1_MAX_EXP, const exp_t SRC2_MAX_EXP, const exp_t RES_MAX_EXP, const man_t TWO, const man_t TWO_BEFORE, const man_t ONE_BEFORE, const man_t TWO_RES, const int MAN_DELTA, const man_t STICKY, const int64_t BIAS_TERM)¶: Floating-point multiplication of src1 and src2 for when the mantissa product fit a single limb , i.e., src1_spec.man_bits + src2_spec.man_bits <= _MAN_LIMIT_BITS.

template<typename RANDOM_ACCESS_ITERATOR_IN1, typename RANDOM_ACCESS_ITERATOR_IN2, typename RANDOM_ACCESS_ITERATOR_INOUT> static inline void _floating_point_mul_general(RANDOM_ACCESS_ITERATOR_IN1 src1, RANDOM_ACCESS_ITERATOR_IN2 src2, RANDOM_ACCESS_ITERATOR_INOUT dst, const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz, const exp_t SRC1_MAX_EXP, const exp_t SRC2_MAX_EXP, const exp_t RES_MAX_EXP)¶: Floating-point multiplication of src1 and src2. This is the most general low-level multiplication function to use only when others won’t work.

template<const std::size_t SRC1_INC = 1, const std::size_t SRC2_INC = 1, const std::size_t DST_INC = 1, typename RANDOM_ACCESS_ITERATOR_IN1, typename RANDOM_ACCESS_ITERATOR_IN2, typename RANDOM_ACCESS_ITERATOR_INOUT> static inline void floating_point_quotients(RANDOM_ACCESS_ITERATOR_IN1 src1, RANDOM_ACCESS_ITERATOR_IN2 src2, RANDOM_ACCESS_ITERATOR_INOUT dst, const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const std::size_t n_elements, const QuantizationMode &qntz, const exp_t RES_MAX_EXP)¶: Iterator-based floating-point quotients.

Variables

static constexpr std::size_t _MAN_T_SIZE_BYTES = sizeof(man_t)¶: Sizes of APyFloat datatypes

static constexpr std::size_t _MAN_T_SIZE_BITS = 8 * _MAN_T_SIZE_BYTES ¶

static constexpr std::size_t _EXP_T_SIZE_BYTES = sizeof(exp_t)¶

static constexpr std::size_t _EXP_T_SIZE_BITS = 8 * _EXP_T_SIZE_BYTES ¶

static constexpr std::size_t _MAN_LIMIT_BITS = _MAN_T_SIZE_BITS - 3¶: APyFloat word length limits.

static constexpr std::size_t _EXP_LIMIT_BITS = _EXP_T_SIZE_BITS - 2¶

class _FloatingPointMultiplierShort¶

#include <apyfloat_util.h>

Short floating-point multiplication functor. Available when: src1_spec.man_bits + src2_spec.man_bits <=_MAN_LIMIT_BITS

Public Functions

inline explicit _FloatingPointMultiplierShort()¶

inline explicit _FloatingPointMultiplierShort(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

_FloatingPointMultiplierShort &operator=(const _FloatingPointMultiplierShort&) = default¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst) const¶

Private Members

APyFloatSpec src1_spec¶

APyFloatSpec src2_spec¶

APyFloatSpec dst_spec¶

QuantizationMode qntz¶

decltype(get_qntz_func(qntz)) qntz_func¶

unsigned SUM_MAN_BITS¶

int MAN_DELTA¶

exp_t SRC1_MAX_EXP¶

exp_t SRC2_MAX_EXP¶

exp_t RES_MAX_EXP¶

man_t TWO¶

man_t TWO_BEFORE¶

man_t ONE_BEFORE¶

man_t TWO_RES¶

man_t STICKY¶

std::int64_t BIAS_TERM¶

class _FloatingPointMultiplierGeneral¶

#include <apyfloat_util.h>

General floating-point multiplication, always available but slower compared to _FloatingPointMultiplierShort.

Public Functions

inline explicit _FloatingPointMultiplierGeneral()¶

inline explicit _FloatingPointMultiplierGeneral(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst) const¶

Private Members

APyFloatSpec src1_spec¶

APyFloatSpec src2_spec¶

APyFloatSpec dst_spec¶

QuantizationMode qntz¶

decltype(get_qntz_func(qntz)) qntz_func¶

exp_t SRC1_MAX_EXP¶

exp_t SRC2_MAX_EXP¶

exp_t RES_MAX_EXP¶

template<bool IS_SUBTRACT> class _FloatingPointAddSubSameWl¶

#include <apyfloat_util.h>

Floating-point adder/subtractor that works when: (1) qntz != QuantizationMode::STOCK_WEIGHTED, (2) dst_spec.man_bits + 5 <= _MAN_T_SIZE_BITS, (3) src1_spec == src2_spec == dst_spec.

Public Functions

inline explicit _FloatingPointAddSubSameWl()¶

inline explicit _FloatingPointAddSubSameWl(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst) const¶

Private Members

APyFloatSpec dst_spec¶

QuantizationMode qntz¶

decltype(get_qntz_func(qntz)) qntz_func¶

exp_t RES_MAX_EXP¶

man_t FINAL_RES_LO¶

man_t RES_LO¶

man_t CARRY_RES_LO¶

man_t MAN_MASK¶

unsigned NORM_CONST¶

template<bool IS_SUBTRACT> class _FloatingPointAddSubDiffWl¶

#include <apyfloat_util.h>

Floating-point adder/subtractor that works when: (1) qntz != QuantizationMode::STOCK_WEIGHTED, (2) dst_spec.man_bits + 5 <= _MAN_T_SIZE_BITS.

Public Functions

inline explicit _FloatingPointAddSubDiffWl()¶

inline explicit _FloatingPointAddSubDiffWl(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst) const¶

Private Members

APyFloatSpec src1_spec¶

APyFloatSpec src2_spec¶

APyFloatSpec dst_spec¶

QuantizationMode qntz¶

decltype(get_qntz_func(qntz)) qntz_func¶

exp_t RES_MAX_EXP¶

man_t FINAL_RES_LO¶

man_t RES_LO¶

man_t CARRY_RES_LO¶

man_t MAN_MASK¶

unsigned NORM_CONST¶

template<bool IS_SUBTRACT> class _FloatingPointAddSubGeneral¶

#include <apyfloat_util.h>

Floating-point adder/subtractor that always works. It is slower compared to _FloatingPointAddSubSameWl and _FloatingPointAddSubDiffWl.

Public Functions

inline explicit _FloatingPointAddSubGeneral()¶

inline explicit _FloatingPointAddSubGeneral(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst) const¶

Private Members

APyFloatSpec src1_spec¶

APyFloatSpec src2_spec¶

APyFloatSpec dst_spec¶

QuantizationMode qntz¶

exp_t RES_MAX_EXP¶

template<bool IS_SUBTRACT, std::size_t SRC1_INC, std::size_t SRC2_INC, std::size_t DST_INC> class _FloatingPointAddSub¶

Public Functions

inline explicit _FloatingPointAddSub(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData &src1, const APyFloatData &src2, APyFloatData &dst) const¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems = 1) const¶

Private Functions

inline void add_general(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems) const¶

inline void add_same_wl(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems) const¶

inline void add_diff_wl(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems) const¶

Private Members

_FloatingPointAddSubSameWl<IS_SUBTRACT> _add_same_wl¶

_FloatingPointAddSubDiffWl<IS_SUBTRACT> _add_diff_wl¶

_FloatingPointAddSubGeneral<IS_SUBTRACT> _add_general¶

void (_FloatingPointAddSub::* f)(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems) const¶

template<std::size_t SRC1_INC = 1, std::size_t SRC2_INC = 1, std::size_t DST_INC = 1> class FloatingPointMultiplier¶

Public Functions

inline explicit FloatingPointMultiplier(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData &src1, const APyFloatData &src2, APyFloatData &dst) const¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems = 1) const¶

Private Functions

inline void mul_short(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems) const¶

inline void mul_general(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems) const¶

Private Members

_FloatingPointMultiplierShort _mul_short¶

_FloatingPointMultiplierGeneral _mul_general¶

void (FloatingPointMultiplier::* f)(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems) const¶

template<std::size_t SRC1_INC = 1, std::size_t SRC2_INC = 1, std::size_t DST_INC = 1> class FloatingPointDivider¶

Public Functions

inline explicit FloatingPointDivider(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData &src1, const APyFloatData &src2, APyFloatData &dst) const¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t nitems = 1) const¶

Private Members

const APyFloatSpec src1_spec¶

const APyFloatSpec src2_spec¶

const APyFloatSpec dst_spec¶

const QuantizationMode qntz¶

exp_t RES_MAX_EXP¶

class FloatingPointInnerProduct¶

Public Functions

inline explicit FloatingPointInnerProduct(const APyFloatSpec &src1_spec, const APyFloatSpec &src2_spec, const APyFloatSpec &dst_spec, const QuantizationMode &qntz)¶

inline void operator()(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t N, std::size_t M = 1, std::size_t DST_STEP = 1) const¶

template<bool SHORT_MUL, auto SHORT_ADD> inline void inner_product(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t N, std::size_t M, std::size_t DST_STEP) const¶

Private Members

void (FloatingPointInnerProduct::* f)(const APyFloatData *src1, const APyFloatData *src2, APyFloatData *dst, std::size_t N, std::size_t M, std::size_t DST_STEP) const¶

_FloatingPointAddSubSameWl<false> add_same_wl¶

_FloatingPointAddSubGeneral<false> add_general¶

_FloatingPointMultiplierShort mul_short¶

_FloatingPointMultiplierGeneral mul_general¶

apyfloat_util.h¶

`apyfloat_util.h`¶