APyFloat

class APyFloat

Public Functions

template<typename RANDOM_ACCESS_ITERATOR>
inline void copy_n_from(RANDOM_ACCESS_ITERATOR src_it, std::size_t n) noexcept

Copy n items from it into *this

template<typename RANDOM_ACCESS_ITERATOR>
inline void copy_n_to(RANDOM_ACCESS_ITERATOR dst_it, std::size_t n) const noexcept

Copy n items from *this into it

inline bool same_type_as(const APyFloat &other) const

Test if two floating-point numbers have the same bit specifiers.

inline APyFloatSpec spec() const noexcept

Retrieve the bit specification.

explicit APyFloat(bool sign, exp_t exp, man_t man, std::uint8_t exp_bits, std::uint8_t man_bits, std::optional<exp_t> bias = std::nullopt)

Constructor with optional bias, all fields are set. If no bias is given, an IEEE-like bias will be used.

APyFloat(bool sign, exp_t exp, man_t man, std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias)

Constructor setting all fields, no optionals.

APyFloat(const APyFloatData &data, std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias)

Constructor setting data fields using a struct, mostly used by the APyFloatArray class

APyFloat(std::uint8_t exp_bits, std::uint8_t man_bits, std::optional<exp_t> bias = std::nullopt)

Constructor where only the format is specified, with an optional bias. Data fields are initialized to zero. If no bias is given, an IEEE-like bias will be used.

APyFloat(std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias)

Constructor only specifying the format, data fields are initialized to zero.

double to_double() const

Cast to double.

operator double() const
nanobind::int_ to_bits() const

Convert the underlying bit pattern to a Python long integer.

APyFloat cast(std::optional<int> exp_bits, std::optional<int> man_bits, std::optional<exp_t> bias = std::nullopt, std::optional<QuantizationMode> quantization = std::nullopt) const

Cast method exposed to Python.

APyFloat _cast(std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias, std::optional<QuantizationMode> quantization = std::nullopt) const

Internal cast method when format is given fully.

APyFloat _cast(std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias, QuantizationMode quantization) const

Internal cast method when format and quantization mode is given.

APyFloat _checked_cast(std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias, QuantizationMode quantization) const

Core cast method when it is known that the bit widths are not the same.

void cast_mantissa(std::uint8_t man_bits, QuantizationMode quantization)

Change the number of mantissa bits. The number is assumed not be NaN or Inf. The exponent is updated in case of carry.

void cast_mantissa_shorter(std::uint8_t man_bits, QuantizationMode quantization)

Decrease (not increase) the number of mantissa bits. The number is assumed not be NaN or Inf. The exponent is updated in case of carry.

void cast_mantissa_subnormal(std::uint8_t man_bits_delta, QuantizationMode quantization)

Change the number of mantissa bits. The number is assumed not be NaN or Inf. The exponent is assumed to be 0 and is updated in case of carry.

APyFloat cast_no_quant(std::uint8_t exp_bits, std::uint8_t man_bits, std::optional<exp_t> bias = std::nullopt) const

Cast to a larger format.

Change the number of mantissa and exponent bits for cases where it is known that quantization does not happen, i.e., the resulting number of bits is not shorter.

APyFloat cast_no_quant(std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias) const

Change the number of mantissa and exponent bits for cases where it is known that quantization does not happen, i.e., the resulting number of bits is not shorter.

APyFloat cast_from_double(std::uint8_t exp_bits, std::uint8_t man_bits, exp_t bias) const

Simplified casting when the input is known to correspond to a double. Values too large will become infinity, and not saturate depending on the quantization mode.

APyFloat _cast_to_double() const

Simplified casting when the input is known that the result will correspond to a double.

std::string str() const

String representation.

std::string repr() const

Python representation.

std::string latex() const

LaTeX representation.

APyFloat operator+(const APyFloat &rhs) const
APyFloat operator-(const APyFloat &rhs) const
APyFloat operator-() const
APyFloat operator*(const APyFloat &rhs) const
APyFloat operator/(const APyFloat &rhs) const
APyFloat &operator+=(const APyFloat &rhs)
APyFloat operator&(const APyFloat &rhs) const
APyFloat operator|(const APyFloat &rhs) const
APyFloat operator^(const APyFloat &rhs) const
APyFloat operator~() const
APyFloat abs() const

Absolute value.

bool operator==(const APyFloat &rhs) const
bool operator!=(const APyFloat &rhs) const
bool operator<=(const APyFloat &rhs) const
bool operator<(const APyFloat &rhs) const
bool operator>=(const APyFloat &rhs) const
bool operator>(const APyFloat &rhs) const
bool operator==(const double rhs) const
bool operator!=(const double rhs) const
bool operator<=(const double rhs) const
bool operator<(const double rhs) const
bool operator>=(const double rhs) const
bool operator>(const double rhs) const
bool operator==(const APyFixed &rhs) const
bool operator!=(const APyFixed &rhs) const
bool operator<=(const APyFixed &rhs) const
bool operator<(const APyFixed &rhs) const
bool operator>=(const APyFixed &rhs) const
bool operator>(const APyFixed &rhs) const
inline bool is_normal() const

True if and only if value is normal (not zero, subnormal, infinite, or NaN).

inline bool is_finite() const

True if and only if value is zero, subnormal, or normal.

inline bool is_subnormal() const

True if and only if value is subnormal.

inline bool is_zero() const

True if and only if value is zero.

inline bool is_nan() const

True if and only if value is NaN.

inline bool is_inf() const

True if and only if value is infinite.

inline bool is_max_exponent() const

True if and only if value is infinite or NaN.

inline bool is_zero_exponent() const

True if and only if value is zero or subnormal.

inline bool get_sign() const

Return the stored sign.

inline man_t get_man() const

Return the stored mantissa, i.e. without leading one.

inline exp_t get_exp() const

Return the stored exponent, i.e. with bias added.

inline exp_t get_bias() const

Return the bias.

inline std::uint8_t get_man_bits() const

Return the bit width of the mantissa field.

inline std::uint8_t get_exp_bits() const

Return the bit width of the exponent field.

inline std::uint8_t get_bits() const

Return the bit width of the entire floating-point format.

inline APyFloatData get_data() const noexcept

Return all data fields packed in a struct.

inline man_t true_man() const

Return the mantissa with potential leading one.

inline std::int64_t true_exp() const

Return the mantissa with potential leading one.

inline void set_data(const APyFloatData &data)

Set the data fields using a struct.

inline void set_sign(bool new_sign)

Set the sign.

inline void set_to_zero(bool new_sign)

Set floating-point object to positive or negative zero.

inline void set_to_zero()

Set floating-point object to zero.

inline void set_to_inf()

Set floating-point object to infinity.

inline void set_to_nan()

Set floating-point object to NaN.

APyFloat construct_zero(std::optional<bool> new_sign = std::nullopt) const

Factory method for creating a floating-point object in the same format with a value of positive or negative zero.

APyFloat construct_inf(std::optional<bool> new_sign = std::nullopt) const

Factory method for creating a floating-point object in the same format with a value of positive or negative infinity.

APyFloat construct_nan(std::optional<bool> new_sign = std::nullopt, man_t payload = 1) const

Factory method for creating a floating-point object in the same format with a value of NaN.

APyFloat &update_from_bits(nb::int_ python_long_int_bit_pattern)

Update data fields based on a bit pattern.

APyFloat &update_from_bits(std::uint64_t bits)

Update data fields based on a bit pattern.

bool is_identical(const APyFloat &other) const

Test if two floating-point numbers are identical, i.e., has the same value, and the same format

APyFixed to_fixed() const

Convert to a fixed-point object.

APyFloat cast_to_double(std::optional<QuantizationMode> quantization = std::nullopt) const

Convenience method when target format is known to correspond to a double-precision floating-point

APyFloat cast_to_single(std::optional<QuantizationMode> quantization = std::nullopt) const

Convenience method when target format is known to correspond to a single-precision floating-point

APyFloat cast_to_half(std::optional<QuantizationMode> quantization = std::nullopt) const

Convenience method when target format is known to correspond to a half-precision floating-point

APyFloat cast_to_bfloat16(std::optional<QuantizationMode> quantization = std::nullopt) const

Convenience method when target format is known to correspond to a 16-bit brain float

APyFloat next_up() const

Get the smallest floating-point number in the same format that compares greater.

APyFloat next_down() const

Get the largest floating-point number in the same format that compares less.

inline exp_t exp_mask() const

Create a bit mask for the exponent field.

inline exp_t max_exponent() const

Calculate the maximum value for the exponent field.

inline exp_t ieee_bias() const

Calculate the IEEE-like bias for this particular format.

inline man_t man_mask() const

Create a bit mask for the mantissa field.

inline man_t leading_one() const

Create a bit mask for the leading one.

APyFloat normalized() const

Create a floating-point object that is normalized.

Add mantissa bits so that a number is no longer subnormal.

Public Static Functions

static void create_in_place(APyFloat *apyfloat, int sign, exp_t exp, man_t man, int exp_bits, int man_bits, std::optional<exp_t> bias = std::nullopt)

Factory function for Python interface.

static APyFloat from_number(const nb::object &py_obj, int exp_bits, int man_bits, std::optional<exp_t> bias = std::nullopt)

Create APyFloat from Python object.

static APyFloat from_double(double value, int exp_bits, int man_bits, std::optional<exp_t> bias = std::nullopt)

Create APyFloat from double.

static APyFloat from_integer(const nb::int_ value, int exp_bits, int man_bits, std::optional<exp_t> bias = std::nullopt)

Create APyFloat from Python integer.

static APyFloat from_fixed(APyFixed value, int exp_bits, int man_bits, std::optional<exp_t> bias = std::nullopt)

Create APyFloat from APyFixed.

static APyFloat from_bits(nanobind::int_ python_long_int_bit_pattern, int exp_bits, int man_bits, std::optional<exp_t> bias = std::nullopt)

Create APyFloat from bit-representation.

static APyFloat one(std::uint8_t exp_bits, std::uint8_t man_bits, std::optional<exp_t> bias = std::nullopt)

Create an APyFloat with the stored value one.

Create a floating-point object with the value one.

static APyFloat pow(const APyFloat &x, const APyFloat &y)

Power function with another APyFloat as the exponent.

static APyFloat pown(const APyFloat &x, int n)

Power function with integer exponent.

static inline exp_t ieee_bias(std::uint8_t exp_bits)

Calculate the IEEE-like bias based on the number of exponent bits.

Friends

friend class APyArray